From 56079431b6ba163df8ba26b3eccc82379f0c0ce4 Mon Sep 17 00:00:00 2001
From: Denis Vlasenko <vda@ilport.com.ua>
Date: Wed, 29 Mar 2006 15:57:29 -0800
Subject: [NET]: Deinline some larger functions from netdevice.h

On a allyesconfig'ured kernel:

Size  Uses Wasted Name and definition
===== ==== ====== ================================================
   95  162  12075 netif_wake_queue      include/linux/netdevice.h
  129   86   9265 dev_kfree_skb_any     include/linux/netdevice.h
  127   56   5885 netif_device_attach   include/linux/netdevice.h
   73   86   4505 dev_kfree_skb_irq     include/linux/netdevice.h
   46   60   1534 netif_device_detach   include/linux/netdevice.h
  119   16   1485 __netif_rx_schedule   include/linux/netdevice.h
  143    5    492 netif_rx_schedule     include/linux/netdevice.h
   81    7    366 netif_schedule        include/linux/netdevice.h

netif_wake_queue is big because __netif_schedule is a big inline:

static inline void __netif_schedule(struct net_device *dev)
{
        if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
                unsigned long flags;
                struct softnet_data *sd;

                local_irq_save(flags);
                sd = &__get_cpu_var(softnet_data);
                dev->next_sched = sd->output_queue;
                sd->output_queue = dev;
                raise_softirq_irqoff(NET_TX_SOFTIRQ);
                local_irq_restore(flags);
        }
}

static inline void netif_wake_queue(struct net_device *dev)
{
#ifdef CONFIG_NETPOLL_TRAP
        if (netpoll_trap())
                return;
#endif
        if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
                __netif_schedule(dev);
}

By de-inlining __netif_schedule we are saving a lot of text
at each callsite of netif_wake_queue and netif_schedule.
__netif_rx_schedule is also big, and it makes more sense to keep
both of them out of line.

Patch also deinlines dev_kfree_skb_any. We can deinline dev_kfree_skb_irq
instead... oh well.

netif_device_attach/detach are not hot paths, we can deinline them too.

Signed-off-by: Denis Vlasenko <vda@ilport.com.ua>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 55 +++++------------------------------------------
 1 file changed, 5 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 950dc55e5192..40ccf8cc4239 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
-static inline void __netif_schedule(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-		unsigned long flags;
-		struct softnet_data *sd;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		dev->next_sched = sd->output_queue;
-		sd->output_queue = dev;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
-}
+extern void __netif_schedule(struct net_device *dev);
 
 static inline void netif_schedule(struct net_device *dev)
 {
@@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb)
 /* Use this variant in places where it could be invoked
  * either from interrupt or non-interrupt context.
  */
-static inline void dev_kfree_skb_any(struct sk_buff *skb)
-{
-	if (in_irq() || irqs_disabled())
-		dev_kfree_skb_irq(skb);
-	else
-		dev_kfree_skb(skb);
-}
+extern void dev_kfree_skb_any(struct sk_buff *skb);
 
 #define HAVE_NETIF_RX 1
 extern int		netif_rx(struct sk_buff *skb);
@@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-static inline void netif_device_detach(struct net_device *dev)
-{
-	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_stop_queue(dev);
-	}
-}
+extern void netif_device_detach(struct net_device *dev);
 
-static inline void netif_device_attach(struct net_device *dev)
-{
-	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
-	    netif_running(dev)) {
-		netif_wake_queue(dev);
- 		__netdev_watchdog_up(dev);
-	}
-}
+extern void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev)
  * already been called and returned 1.
  */
 
-static inline void __netif_rx_schedule(struct net_device *dev)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	dev_hold(dev);
-	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	if (dev->quota < 0)
-		dev->quota += dev->weight;
-	else
-		dev->quota = dev->weight;
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	local_irq_restore(flags);
-}
+extern void __netif_rx_schedule(struct net_device *dev);
 
 /* Try to reschedule poll. Called by irq handler. */
 
-- 
cgit v1.2.3


From d53ace70052b5c0a08a4f92993c0614f84920abf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Wed, 29 Mar 2006 09:42:43 +0100
Subject: [ARM] Allow un-muxed syscalls to be available for everyone

It's been a while since the un-muxed socket and ipc syscalls were
introduced, so make the unistd.h number definitions visible for
non-EABI as well as EABI.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/unistd.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 8f331bbd39a8..65ac305c2d45 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -308,8 +308,6 @@
 #define __NR_mq_notify			(__NR_SYSCALL_BASE+278)
 #define __NR_mq_getsetattr		(__NR_SYSCALL_BASE+279)
 #define __NR_waitid			(__NR_SYSCALL_BASE+280)
-
-#if defined(__ARM_EABI__)  /* reserve these for un-muxing socketcall */
 #define __NR_socket			(__NR_SYSCALL_BASE+281)
 #define __NR_bind			(__NR_SYSCALL_BASE+282)
 #define __NR_connect			(__NR_SYSCALL_BASE+283)
@@ -327,9 +325,6 @@
 #define __NR_getsockopt			(__NR_SYSCALL_BASE+295)
 #define __NR_sendmsg			(__NR_SYSCALL_BASE+296)
 #define __NR_recvmsg			(__NR_SYSCALL_BASE+297)
-#endif
-
-#if defined(__ARM_EABI__)  /* reserve these for un-muxing ipc */
 #define __NR_semop			(__NR_SYSCALL_BASE+298)
 #define __NR_semget			(__NR_SYSCALL_BASE+299)
 #define __NR_semctl			(__NR_SYSCALL_BASE+300)
@@ -341,16 +336,10 @@
 #define __NR_shmdt			(__NR_SYSCALL_BASE+306)
 #define __NR_shmget			(__NR_SYSCALL_BASE+307)
 #define __NR_shmctl			(__NR_SYSCALL_BASE+308)
-#endif
-
 #define __NR_add_key			(__NR_SYSCALL_BASE+309)
 #define __NR_request_key		(__NR_SYSCALL_BASE+310)
 #define __NR_keyctl			(__NR_SYSCALL_BASE+311)
-
-#if defined(__ARM_EABI__)  /* reserved for un-muxing ipc */
 #define __NR_semtimedop			(__NR_SYSCALL_BASE+312)
-#endif
-
 #define __NR_vserver			(__NR_SYSCALL_BASE+313)
 #define __NR_ioprio_set			(__NR_SYSCALL_BASE+314)
 #define __NR_ioprio_get			(__NR_SYSCALL_BASE+315)
-- 
cgit v1.2.3


From fd88dd740ad7b92cd399b6116dfa9486b36ffaff Mon Sep 17 00:00:00 2001
From: Marc-Andre Hebert <marcandreh@humanware.ca>
Date: Thu, 30 Mar 2006 10:24:08 +0100
Subject: [ARM] 3434/1: pxa i2s amsl define

Patch from Marc-Andre Hebert

The error concerns a bit mask define for the AMSL bit of the SACR1 register in the 2.6 kernel tree. The AMSL is bit 0 and it was defined as so in the 2.4 kernel tree but it is inccorrectly set as bit 1 (a reserved bit) in the 2.6 kernel tree.

Signed-off-by: Marc-Andre Hebert <marcandreh@humanware.ca>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 1409c5bd703f..c8f53a71c076 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -485,7 +485,7 @@
 #define SACR1_ENLBF	(1 << 5)	/* Enable Loopback */
 #define SACR1_DRPL	(1 << 4) 	/* Disable Replaying Function */
 #define SACR1_DREC	(1 << 3)	/* Disable Recording Function */
-#define SACR1_AMSL	(1 << 1)	/* Specify Alternate Mode */
+#define SACR1_AMSL	(1 << 0)	/* Specify Alternate Mode */
 
 #define SASR0_I2SOFF	(1 << 7)	/* Controller Status */
 #define SASR0_ROR	(1 << 6)	/* Rx FIFO Overrun */
-- 
cgit v1.2.3


From cc3d48db75235adf0ae37d3287f6f9e14657d1ae Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 30 Mar 2006 10:51:44 +0100
Subject: [ARM] 3424/2: ixp23xx: fix uncompress.h for recent CRLF decompressor
 change

Patch from Lennert Buytenhek

Adapt ixp23xx uncompress.h to a081568d7016061ed848696984e3acf1ba0b3054.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-ixp23xx/uncompress.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-ixp23xx/uncompress.h b/include/asm-arm/arch-ixp23xx/uncompress.h
index 62623fa9b2f7..013575e6a9a1 100644
--- a/include/asm-arm/arch-ixp23xx/uncompress.h
+++ b/include/asm-arm/arch-ixp23xx/uncompress.h
@@ -16,26 +16,21 @@
 
 #define UART_BASE	((volatile u32 *)IXP23XX_UART1_PHYS)
 
-static __inline__ void putc(char c)
+static inline void putc(char c)
 {
 	int j;
 
 	for (j = 0; j < 0x1000; j++) {
 		if (UART_BASE[UART_LSR] & UART_LSR_THRE)
 			break;
+		barrier();
 	}
 
 	UART_BASE[UART_TX] = c;
 }
 
-static void putstr(const char *s)
+static inline void flush(void)
 {
-	while (*s) {
-		putc(*s);
-		if (*s == '\n')
-			putc('\r');
-		s++;
-	}
 }
 
 #define arch_decomp_setup()
-- 
cgit v1.2.3


From 618a3c03fcfdf1ac4543247c8ddfb0c9d775ff33 Mon Sep 17 00:00:00 2001
From: Hal Rosenstock <halr@voltaire.com>
Date: Tue, 28 Mar 2006 16:40:04 -0800
Subject: IB/mad: RMPP support for additional classes

Add RMPP support for additional management classes that support it.
Also, validate RMPP is consistent with management class specified.

Signed-off-by: Hal Rosenstock <halr@voltaire.com>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/mad.c      | 54 ++++++++++++++++++++++++++++++++++----
 drivers/infiniband/core/mad_rmpp.c | 19 +++-----------
 drivers/infiniband/core/user_mad.c | 30 +++++----------------
 include/rdma/ib_mad.h              | 27 ++++++++++++++++++-
 4 files changed, 85 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index d4d07012a5ca..ba54c856b0e5 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -227,6 +227,14 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 			if (!is_vendor_oui(mad_reg_req->oui))
 				goto error1;
 		}
+		/* Make sure class supplied is consistent with RMPP */
+		if (ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
+			if (!rmpp_version)
+				goto error1;
+		} else {
+			if (rmpp_version)
+				goto error1;
+		}
 		/* Make sure class supplied is consistent with QP type */
 		if (qp_type == IB_QPT_SMI) {
 			if ((mad_reg_req->mgmt_class !=
@@ -890,6 +898,35 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 }
 EXPORT_SYMBOL(ib_create_send_mad);
 
+int ib_get_mad_data_offset(u8 mgmt_class)
+{
+	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
+		return IB_MGMT_SA_HDR;
+	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
+		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
+		 (mgmt_class == IB_MGMT_CLASS_BIS))
+		return IB_MGMT_DEVICE_HDR;
+	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
+		return IB_MGMT_VENDOR_HDR;
+	else
+		return IB_MGMT_MAD_HDR;
+}
+EXPORT_SYMBOL(ib_get_mad_data_offset);
+
+int ib_is_mad_class_rmpp(u8 mgmt_class)
+{
+	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
+	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
+	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
+	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
+	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(ib_is_mad_class_rmpp);
+
 void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
 {
 	struct ib_mad_send_wr_private *mad_send_wr;
@@ -1022,6 +1059,13 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 			goto error;
 		}
 
+		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
+			if (mad_agent_priv->agent.rmpp_version) {
+				ret = -EINVAL;
+				goto error;
+			}
+		}
+
 		/*
 		 * Save pointer to next work request to post in case the
 		 * current one completes, and the user modifies the work
@@ -2454,11 +2498,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 			}
 		}
 		sg_list.addr = dma_map_single(qp_info->port_priv->
-						device->dma_device,
-					&mad_priv->grh,
-					sizeof *mad_priv -
-						sizeof mad_priv->header,
-					DMA_FROM_DEVICE);
+					      	device->dma_device,
+					      &mad_priv->grh,
+					      sizeof *mad_priv -
+					      	sizeof mad_priv->header,
+					      DMA_FROM_DEVICE);
 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
 		mad_priv->header.mad_list.mad_queue = recv_queue;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index a6405079c285..dfd4e588ce03 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2005 Intel Inc. All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -100,17 +100,6 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
 	}
 }
 
-static int data_offset(u8 mgmt_class)
-{
-	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
-		return IB_MGMT_SA_HDR;
-	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
-		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
-		return IB_MGMT_VENDOR_HDR;
-	else
-		return IB_MGMT_RMPP_HDR;
-}
-
 static void format_ack(struct ib_mad_send_buf *msg,
 		       struct ib_rmpp_mad *data,
 		       struct mad_rmpp_recv *rmpp_recv)
@@ -137,7 +126,7 @@ static void ack_recv(struct mad_rmpp_recv *rmpp_recv,
 	struct ib_mad_send_buf *msg;
 	int ret, hdr_len;
 
-	hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
+	hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
 	msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
 				 recv_wc->wc->pkey_index, 1, hdr_len,
 				 0, GFP_KERNEL);
@@ -163,7 +152,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
 	if (IS_ERR(ah))
 		return (void *) ah;
 
-	hdr_len = data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
+	hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
 	msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
 				 recv_wc->wc->pkey_index, 1,
 				 hdr_len, 0, GFP_KERNEL);
@@ -408,7 +397,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
 
 	rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad;
 
-	hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class);
+	hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
 	data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
 	pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
 	if (pad > IB_MGMT_RMPP_DATA || pad < 0)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index fb6cd42601f9..afe70a549c2f 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -177,17 +177,6 @@ static int queue_packet(struct ib_umad_file *file,
 	return ret;
 }
 
-static int data_offset(u8 mgmt_class)
-{
-	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
-		return IB_MGMT_SA_HDR;
-	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
-		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
-		return IB_MGMT_VENDOR_HDR;
-	else
-		return IB_MGMT_RMPP_HDR;
-}
-
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *send_wc)
 {
@@ -283,7 +272,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
 			 */
 			return -ENOSPC;
 		}
-		offset = data_offset(recv_buf->mad->mad_hdr.mgmt_class);
+		offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class);
 		max_seg_payload = sizeof (struct ib_mad) - offset;
 
 		for (left = packet->length - seg_payload, buf += seg_payload;
@@ -441,21 +430,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	}
 
 	rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
-	if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
-		hdr_len = IB_MGMT_SA_HDR;
-		copy_offset = IB_MGMT_RMPP_HDR;
-		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
-			      IB_MGMT_RMPP_FLAG_ACTIVE;
-	} else if (rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START &&
-		   rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END) {
-		hdr_len = IB_MGMT_VENDOR_HDR;
+	hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+	if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
+		copy_offset = IB_MGMT_MAD_HDR;
+		rmpp_active = 0;
+	} else {
 		copy_offset = IB_MGMT_RMPP_HDR;
 		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 			      IB_MGMT_RMPP_FLAG_ACTIVE;
-	} else {
-		hdr_len = IB_MGMT_MAD_HDR;
-		copy_offset = IB_MGMT_MAD_HDR;
-		rmpp_active = 0;
 	}
 
 	data_len = count - sizeof (struct ib_user_mad) - hdr_len;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 51ab8eddb295..5ff77558013b 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Voltaire Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -55,6 +55,10 @@
 #define IB_MGMT_CLASS_DEVICE_MGMT		0x06
 #define IB_MGMT_CLASS_CM			0x07
 #define IB_MGMT_CLASS_SNMP			0x08
+#define IB_MGMT_CLASS_DEVICE_ADM		0x10
+#define IB_MGMT_CLASS_BOOT_MGMT			0x11
+#define IB_MGMT_CLASS_BIS			0x12
+#define IB_MGMT_CLASS_CONG_MGMT			0x21
 #define IB_MGMT_CLASS_VENDOR_RANGE2_START	0x30
 #define IB_MGMT_CLASS_VENDOR_RANGE2_END		0x4F
 
@@ -117,6 +121,8 @@ enum {
 	IB_MGMT_VENDOR_DATA = 216,
 	IB_MGMT_SA_HDR = 56,
 	IB_MGMT_SA_DATA = 200,
+	IB_MGMT_DEVICE_HDR = 64,
+	IB_MGMT_DEVICE_DATA = 192,
 };
 
 struct ib_mad_hdr {
@@ -602,6 +608,25 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask);
 
+/**
+ * ib_is_mad_class_rmpp - returns whether given management class
+ * supports RMPP.
+ * @mgmt_class: management class
+ *
+ * This routine returns whether the management class supports RMPP.
+ */
+int ib_is_mad_class_rmpp(u8 mgmt_class);
+
+/**
+ * ib_get_mad_data_offset - returns the data offset for a given
+ * management class.
+ * @mgmt_class: management class
+ *
+ * This routine returns the data offset in the MAD for the management
+ * class requested.
+ */
+int ib_get_mad_data_offset(u8 mgmt_class);
+
 /**
  * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
  * @send_buf: Previously allocated send data buffer.
-- 
cgit v1.2.3


From 3283a67d8618c9a292eced23e8753ab64adc6dba Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Thu, 30 Mar 2006 10:13:22 -0500
Subject: [IA64] Add __mca_table to the DISCARD list in gate.lds

Add __mca_table to the DISCARD list for the gate.lds linker script to
avoid broken linker references when linking the final vmlinux file.

Also add comment to include/asm-ia64/asmmacros.h to avoid anyone else
hitting this problem in the future.

Credits to James Bottomley <James.Bottomley@SteelEye.com> for spotting
the DISCARD list in gate.lds.S

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/gate.lds.S | 1 +
 include/asm-ia64/asmmacro.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index e1e4aba9ecd0..7c99e6ec3daf 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -59,6 +59,7 @@ SECTIONS
 	*(.dynbss)
 	*(.bss .bss.* .gnu.linkonce.b.*)
 	*(__ex_table)
+	*(__mca_table)
   }
 }
 
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index d4cec32083d8..edf2cebb2969 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -38,6 +38,10 @@ name:
 
 /*
  * Helper macros for accessing user memory.
+ *
+ * When adding any new .section/.previous entries here, make sure to
+ * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or
+ * unpleasant things will happen.
  */
 
 	.section "__ex_table", "a"		// declare section & section attributes
-- 
cgit v1.2.3


From a81dd18eb974cc34634c53a6447b2799ec0c3158 Mon Sep 17 00:00:00 2001
From: Thibaut VARENE <varenet@parisc-linux.org>
Date: Fri, 3 Feb 2006 18:06:30 -0700
Subject: [PARISC] Clarify pdc_stable license terms

pdc_stable.c is explicitly licensed under GPL version 2.

Signed-off-by: Thibaut VARENE <varenet@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/kernel/pdc_chassis.c | 5 ++---
 drivers/parisc/pdc_stable.c      | 5 ++---
 include/asm-parisc/pdc_chassis.h | 5 ++---
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c
index 0cea6958f427..a45e2e2ffd9f 100644
--- a/arch/parisc/kernel/pdc_chassis.c
+++ b/arch/parisc/kernel/pdc_chassis.c
@@ -5,9 +5,8 @@
  *    Copyright (C) 2002-2004 Thibaut VARENE <varenet@parisc-linux.org>
  *
  *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
+ *    it under the terms of the GNU General Public License, version 2, as
+ *    published by the Free Software Foundation.
  *
  *    This program is distributed in the hope that it will be useful,
  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index a28e17898fbd..4e53be9c03ab 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -4,9 +4,8 @@
  *    Copyright (C) 2005-2006 Thibaut VARENE <varenet@parisc-linux.org>
  *
  *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
+ *    it under the terms of the GNU General Public License, version 2, as
+ *    published by the Free Software Foundation.
  *
  *    This program is distributed in the hope that it will be useful,
  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/include/asm-parisc/pdc_chassis.h b/include/asm-parisc/pdc_chassis.h
index adac9ac2743f..a609273dc6bf 100644
--- a/include/asm-parisc/pdc_chassis.h
+++ b/include/asm-parisc/pdc_chassis.h
@@ -6,9 +6,8 @@
  *
  *
  *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License as published by
- *      the Free Software Foundation; either version 2, or (at your option)
- *      any later version.
+ *      it under the terms of the GNU General Public License, version 2, as
+ *      published by the Free Software Foundation.
  *      
  *      This program is distributed in the hope that it will be useful,
  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
-- 
cgit v1.2.3


From ab43227c8a568119a3aebc952a95ac3023e1730d Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@parisc-linux.org>
Date: Wed, 22 Mar 2006 08:28:59 -0700
Subject: [PARISC] Add parisc implementation of flush_anon_page()

This should now allow SG_IO and fuse to function correctly on our
platform.

Signed-off-by: James Bottomley <jejb@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 include/asm-parisc/cacheflush.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index c53af9ff41b5..482be77551e3 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -184,6 +184,14 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 
 }
 
+static inline void
+flush_anon_page(struct page *page, unsigned long vmaddr)
+{
+	if (PageAnon(page))
+		flush_user_dcache_page(vmaddr);
+}
+#define ARCH_HAS_FLUSH_ANON_PAGE
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
-- 
cgit v1.2.3


From ba57583396585a1ca509e2a84d970a2ff3f9bbfb Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@parisc-linux.org>
Date: Wed, 22 Mar 2006 09:42:04 -0700
Subject: [PARISC] Add parisc implementation of flush_kernel_dcache_page()

We need to do a little renaming of our original syntax because
of the difference in arguments.

Signed-off-by: James Bottomley <jejb@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/kernel/cache.c      | 6 +++---
 arch/parisc/kernel/pacache.S    | 4 ++--
 include/asm-parisc/cache.h      | 2 +-
 include/asm-parisc/cacheflush.h | 9 ++++++++-
 include/asm-parisc/page.h       | 2 +-
 5 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index d8a4ca021aac..360b7391cb8c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -89,7 +89,7 @@ update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 	if (pfn_valid(page_to_pfn(page)) && page_mapping(page) &&
 	    test_bit(PG_dcache_dirty, &page->flags)) {
 
-		flush_kernel_dcache_page(page_address(page));
+		flush_kernel_dcache_page(page);
 		clear_bit(PG_dcache_dirty, &page->flags);
 	}
 }
@@ -278,7 +278,7 @@ void flush_dcache_page(struct page *page)
 		return;
 	}
 
-	flush_kernel_dcache_page(page_address(page));
+	flush_kernel_dcache_page(page);
 
 	if (!mapping)
 		return;
@@ -317,7 +317,7 @@ EXPORT_SYMBOL(flush_dcache_page);
 
 /* Defined in arch/parisc/kernel/pacache.S */
 EXPORT_SYMBOL(flush_kernel_dcache_range_asm);
-EXPORT_SYMBOL(flush_kernel_dcache_page);
+EXPORT_SYMBOL(flush_kernel_dcache_page_asm);
 EXPORT_SYMBOL(flush_data_cache_local);
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 9534ee17b9be..7a4f07e8d3c3 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -621,9 +621,9 @@ __clear_user_page_asm:
 
 	.procend
 
-	.export flush_kernel_dcache_page
+	.export flush_kernel_dcache_page_asm
 
-flush_kernel_dcache_page:
+flush_kernel_dcache_page_asm:
 	.proc
 	.callinfo NO_CALLS
 	.entry
diff --git a/include/asm-parisc/cache.h b/include/asm-parisc/cache.h
index ae50f8e12eed..c831665473cb 100644
--- a/include/asm-parisc/cache.h
+++ b/include/asm-parisc/cache.h
@@ -48,7 +48,7 @@ extern void flush_user_icache_range_asm(unsigned long, unsigned long);
 extern void flush_kernel_icache_range_asm(unsigned long, unsigned long);
 extern void flush_user_dcache_range_asm(unsigned long, unsigned long);
 extern void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
-extern void flush_kernel_dcache_page(void *);
+extern void flush_kernel_dcache_page_asm(void *);
 extern void flush_kernel_icache_page(void *);
 extern void disable_sr_hashing(void);   /* turns off space register hashing */
 extern void disable_sr_hashing_asm(int); /* low level support for above */
diff --git a/include/asm-parisc/cacheflush.h b/include/asm-parisc/cacheflush.h
index 482be77551e3..76b6b7d6046a 100644
--- a/include/asm-parisc/cacheflush.h
+++ b/include/asm-parisc/cacheflush.h
@@ -62,7 +62,7 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_unlock(mapping) \
 	write_unlock_irq(&(mapping)->tree_lock)
 
-#define flush_icache_page(vma,page)	do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0)
+#define flush_icache_page(vma,page)	do { flush_kernel_dcache_page(page); flush_kernel_icache_page(page_address(page)); } while (0)
 
 #define flush_icache_range(s,e)		do { flush_kernel_dcache_range_asm(s,e); flush_kernel_icache_range_asm(s,e); } while (0)
 
@@ -192,6 +192,13 @@ flush_anon_page(struct page *page, unsigned long vmaddr)
 }
 #define ARCH_HAS_FLUSH_ANON_PAGE
 
+static inline void
+flush_kernel_dcache_page(struct page *page)
+{
+	flush_kernel_dcache_page_asm(page_address(page));
+}
+#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index 9f303c0c3cd7..c6c9d5793070 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -26,7 +26,7 @@ static inline void
 copy_user_page(void *vto, void *vfrom, unsigned long vaddr, struct page *pg)
 {
 	copy_user_page_asm(vto, vfrom);
-	flush_kernel_dcache_page(vto);
+	flush_kernel_dcache_page_asm(vto);
 	/* XXX: ppc flushes icache too, should we? */
 }
 
-- 
cgit v1.2.3


From b31059f7634931a06f6811247ae9217d1a833a46 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@parisc-linux.org>
Date: Thu, 23 Mar 2006 15:48:15 -0700
Subject: [PARISC] Add STRICT_MM_TYPECHECKS

Add STRICT_MM_TYPECHECKS to page.h as other architectures do.

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 include/asm-parisc/page.h | 56 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h
index c6c9d5793070..45e02aa5bf4b 100644
--- a/include/asm-parisc/page.h
+++ b/include/asm-parisc/page.h
@@ -40,14 +40,19 @@ clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 /*
  * These are used to make use of C type-checking..
  */
-#ifdef __LP64__
-typedef struct { unsigned long pte; } pte_t;
-#else
-typedef struct {
-	unsigned long pte;
-	unsigned long flags;
-} pte_t;
+#define STRICT_MM_TYPECHECKS
+#ifdef STRICT_MM_TYPECHECKS
+typedef struct { unsigned long pte;
+#if !defined(CONFIG_64BIT)
+                 unsigned long future_flags;
+ /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY
+	 to 2, but then strangely the identical 32bit kernel boots on a
+	 c3000(pa20), but not any longer on a 715(pa11).
+	 Still investigating... HelgeD.
+  */
 #endif
+} pte_t; /* either 32 or 64bit */
+
 /* NOTE: even on 64 bits, these entries are __u32 because we allocate
  * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */
 typedef struct { __u32 pmd; } pmd_t;
@@ -55,25 +60,44 @@ typedef struct { __u32 pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
 
 #define pte_val(x)	((x).pte)
-#ifdef __LP64__
-#define pte_flags(x)	(*(__u32 *)&((x).pte))
-#else
-#define pte_flags(x)	((x).flags)
-#endif
-
 /* These do not work lvalues, so make sure we don't use them as such. */
 #define pmd_val(x)	((x).pmd + 0)
 #define pgd_val(x)	((x).pgd + 0)
 #define pgprot_val(x)	((x).pgprot)
 
-#define __pmd_val_set(x,n) (x).pmd = (n)
-#define __pgd_val_set(x,n) (x).pgd = (n)
-
 #define __pte(x)	((pte_t) { (x) } )
 #define __pmd(x)	((pmd_t) { (x) } )
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
+#define __pmd_val_set(x,n) (x).pmd = (n)
+#define __pgd_val_set(x,n) (x).pgd = (n)
+
+#else
+/*
+ * .. while these make it easier on the compiler
+ */
+typedef unsigned long pte_t;
+typedef         __u32 pmd_t;
+typedef         __u32 pgd_t;
+typedef unsigned long pgprot_t;
+
+#define pte_val(x)      (x)
+#define pmd_val(x)      (x)
+#define pgd_val(x)      (x)
+#define pgprot_val(x)   (x)
+
+#define __pte(x)        (x)
+#define __pmd(x)	(x)
+#define __pgd(x)        (x)
+#define __pgprot(x)     (x)
+
+#define __pmd_val_set(x,n) (x) = (n)
+#define __pgd_val_set(x,n) (x) = (n)
+
+#endif /* STRICT_MM_TYPECHECKS */
+
+
 typedef struct __physmem_range {
 	unsigned long start_pfn;
 	unsigned long pages;       /* PAGE_SIZE pages */
-- 
cgit v1.2.3


From b8ce0aadcdebbaf5ec013c57e2a0803060817bcc Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@parisc-linux.org>
Date: Tue, 7 Mar 2006 14:12:13 -0700
Subject: [PARISC] Add CONFIG_HPPA_IOREMAP to conditionally enable ioremap

Instead of making it a #define in asm/io.h, allow user to select
to turn on IOREMAP from the config menu.

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/Kconfig      | 11 +++++++++++
 arch/parisc/mm/ioremap.c | 13 ++++++-------
 include/asm-parisc/io.h  | 16 +++++-----------
 3 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 6b3c50964ca9..fc5755d1db57 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -138,6 +138,17 @@ config 64BIT
 	  enable this option otherwise. The 64bit kernel is significantly bigger
 	  and slower than the 32bit one.
 
+config HPPA_IOREMAP
+	bool "Enable IOREMAP functionality (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default n
+	help
+	  Enable this option if you want to enable real IOREMAPPING on PA-RISC.
+	  Currently we just "simulate" I/O remapping, and enabling this option
+	  will just crash your machine.
+
+	  Say N here, unless you are a real PA-RISC Linux kernel hacker.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index edd9a9559cba..028772144191 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -72,7 +72,7 @@ remap_area_pmd(pmd_t *pmd, unsigned long address, unsigned long size,
 	return 0;
 }
 
-#if USE_HPPA_IOREMAP
+#ifdef CONFIG_HPPA_IOREMAP
 static int 
 remap_area_pages(unsigned long address, unsigned long phys_addr,
 		 unsigned long size, unsigned long flags)
@@ -114,7 +114,7 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 
 	return error;
 }
-#endif /* USE_HPPA_IOREMAP */
+#endif /* CONFIG_HPPA_IOREMAP */
 
 #ifdef CONFIG_DEBUG_IOREMAP
 static unsigned long last = 0;
@@ -154,8 +154,7 @@ EXPORT_SYMBOL(__raw_bad_addr);
  */
 void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
 {
-#if !(USE_HPPA_IOREMAP)
-
+#if !defined(CONFIG_HPPA_IOREMAP)
 	unsigned long end = phys_addr + size - 1;
 	/* Support EISA addresses */
 	if ((phys_addr >= 0x00080000 && end < 0x000fffff)
@@ -222,10 +221,10 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 
 void iounmap(void __iomem *addr)
 {
-#if !(USE_HPPA_IOREMAP)
-	return;
-#else
+#ifdef CONFIG_HPPA_IOREMAP
 	if (addr > high_memory)
 		return vfree((void *) (PAGE_MASK & (unsigned long __force) addr));
+#else
+	return;
 #endif
 }
diff --git a/include/asm-parisc/io.h b/include/asm-parisc/io.h
index be0c7234a6da..2fb253255102 100644
--- a/include/asm-parisc/io.h
+++ b/include/asm-parisc/io.h
@@ -26,11 +26,7 @@ extern unsigned long parisc_vmerge_max_size;
  */
 
 #ifdef CONFIG_DEBUG_IOREMAP
-#ifdef CONFIG_64BIT
-#define NYBBLE_SHIFT 60
-#else
-#define NYBBLE_SHIFT 28
-#endif
+#define NYBBLE_SHIFT (BITS_PER_LONG - 4)
 extern void gsc_bad_addr(unsigned long addr);
 extern void __raw_bad_addr(const volatile void __iomem *addr);
 #define gsc_check_addr(addr)					\
@@ -181,13 +177,11 @@ extern inline void * ioremap_nocache(unsigned long offset, unsigned long size)
 extern void iounmap(void __iomem *addr);
 
 /*
- * USE_HPPA_IOREMAP is the magic flag to enable or disable real ioremap()
+ * CONFIG_HPPA_IOREMAP is the magic flag to enable or disable real ioremap()
  * functionality.  It's currently disabled because it may not work on some
  * machines.
  */
-#define USE_HPPA_IOREMAP 0
-
-#if USE_HPPA_IOREMAP
+#ifdef CONFIG_HPPA_IOREMAP
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
 {
 	return (*(volatile unsigned char __force *) (addr));
@@ -221,7 +215,7 @@ static inline void __raw_writeq(unsigned long long b, volatile void __iomem *add
 {
 	*(volatile unsigned long long __force *) addr = b;
 }
-#else /* !USE_HPPA_IOREMAP */
+#else /* !CONFIG_HPPA_IOREMAP */
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
 {
 	__raw_check_addr(addr);
@@ -271,7 +265,7 @@ static inline void __raw_writeq(unsigned long long b, volatile void __iomem *add
 
 	gsc_writeq(b, (unsigned long) addr);
 }
-#endif /* !USE_HPPA_IOREMAP */
+#endif /* !CONFIG_HPPA_IOREMAP */
 
 /* readb can never be const, so use __fswab instead of le*_to_cpu */
 #define readb(addr) __raw_readb(addr)
-- 
cgit v1.2.3


From 29ef8295327653ff09a56285c35213cd31fa54b3 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@parisc-linux.org>
Date: Thu, 23 Mar 2006 00:32:46 -0700
Subject: [PARISC] Enable ioremap functionality unconditionally

Enable CONFIG_HPPA_IOREMAP by default and remove all now unnecessary code.

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/Kconfig      | 11 ---------
 arch/parisc/mm/ioremap.c | 16 ++-----------
 include/asm-parisc/io.h  | 58 +-----------------------------------------------
 3 files changed, 3 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fc5755d1db57..6b3c50964ca9 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -138,17 +138,6 @@ config 64BIT
 	  enable this option otherwise. The 64bit kernel is significantly bigger
 	  and slower than the 32bit one.
 
-config HPPA_IOREMAP
-	bool "Enable IOREMAP functionality (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	default n
-	help
-	  Enable this option if you want to enable real IOREMAPPING on PA-RISC.
-	  Currently we just "simulate" I/O remapping, and enabling this option
-	  will just crash your machine.
-
-	  Say N here, unless you are a real PA-RISC Linux kernel hacker.
-
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 028772144191..5a22e1cab217 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -72,7 +72,6 @@ remap_area_pmd(pmd_t *pmd, unsigned long address, unsigned long size,
 	return 0;
 }
 
-#ifdef CONFIG_HPPA_IOREMAP
 static int 
 remap_area_pages(unsigned long address, unsigned long phys_addr,
 		 unsigned long size, unsigned long flags)
@@ -114,7 +113,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 
 	return error;
 }
-#endif /* CONFIG_HPPA_IOREMAP */
 
 #ifdef CONFIG_DEBUG_IOREMAP
 static unsigned long last = 0;
@@ -154,21 +152,16 @@ EXPORT_SYMBOL(__raw_bad_addr);
  */
 void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
 {
-#if !defined(CONFIG_HPPA_IOREMAP)
+#ifdef CONFIG_EISA
+	#error FIXME.
 	unsigned long end = phys_addr + size - 1;
 	/* Support EISA addresses */
 	if ((phys_addr >= 0x00080000 && end < 0x000fffff)
 			|| (phys_addr >= 0x00500000 && end < 0x03bfffff)) {
 		phys_addr |= 0xfc000000;
 	}
-
-#ifdef CONFIG_DEBUG_IOREMAP
-	return (void __iomem *)(phys_addr - (0x1UL << NYBBLE_SHIFT));
-#else
-	return (void __iomem *)phys_addr;
 #endif
 
-#else
 	void *addr;
 	struct vm_struct *area;
 	unsigned long offset, last_addr;
@@ -216,15 +209,10 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	}
 
 	return (void __iomem *) (offset + (char *)addr);
-#endif
 }
 
 void iounmap(void __iomem *addr)
 {
-#ifdef CONFIG_HPPA_IOREMAP
 	if (addr > high_memory)
 		return vfree((void *) (PAGE_MASK & (unsigned long __force) addr));
-#else
-	return;
-#endif
 }
diff --git a/include/asm-parisc/io.h b/include/asm-parisc/io.h
index 2fb253255102..bcfe0b09c93f 100644
--- a/include/asm-parisc/io.h
+++ b/include/asm-parisc/io.h
@@ -176,12 +176,7 @@ extern inline void * ioremap_nocache(unsigned long offset, unsigned long size)
 
 extern void iounmap(void __iomem *addr);
 
-/*
- * CONFIG_HPPA_IOREMAP is the magic flag to enable or disable real ioremap()
- * functionality.  It's currently disabled because it may not work on some
- * machines.
- */
-#ifdef CONFIG_HPPA_IOREMAP
+
 static inline unsigned char __raw_readb(const volatile void __iomem *addr)
 {
 	return (*(volatile unsigned char __force *) (addr));
@@ -215,57 +210,6 @@ static inline void __raw_writeq(unsigned long long b, volatile void __iomem *add
 {
 	*(volatile unsigned long long __force *) addr = b;
 }
-#else /* !CONFIG_HPPA_IOREMAP */
-static inline unsigned char __raw_readb(const volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	return gsc_readb((unsigned long) addr);
-}
-static inline unsigned short __raw_readw(const volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	return gsc_readw((unsigned long) addr);
-}
-static inline unsigned int __raw_readl(const volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	return gsc_readl((unsigned long) addr);
-}
-static inline unsigned long long __raw_readq(const volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	return gsc_readq((unsigned long) addr);
-}
-
-static inline void __raw_writeb(unsigned char b, volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	gsc_writeb(b, (unsigned long) addr);
-}
-static inline void __raw_writew(unsigned short b, volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	gsc_writew(b, (unsigned long) addr);
-}
-static inline void __raw_writel(unsigned int b, volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	gsc_writel(b, (unsigned long) addr);
-}
-static inline void __raw_writeq(unsigned long long b, volatile void __iomem *addr)
-{
-	__raw_check_addr(addr);
-
-	gsc_writeq(b, (unsigned long) addr);
-}
-#endif /* !CONFIG_HPPA_IOREMAP */
 
 /* readb can never be const, so use __fswab instead of le*_to_cpu */
 #define readb(addr) __raw_readb(addr)
-- 
cgit v1.2.3


From a41d3862dfd44a1b09a0f6243bb34773061fd9a2 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@parisc-linux.org>
Date: Thu, 23 Mar 2006 01:07:00 -0700
Subject: [PARISC] Remove obsolete CONFIG_DEBUG_IOREMAP

Remove CONFIG_DEBUG_IOREMAP, it's now obsolete and won't work anyway.
Remove it from lib/KConfig since it was only available on parisc.

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/lib/iomap.c  |  4 ----
 arch/parisc/mm/ioremap.c | 24 ------------------------
 include/asm-parisc/io.h  | 33 ---------------------------------
 lib/Kconfig.debug        | 13 -------------
 4 files changed, 74 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 01bec8fcbd0d..f4a811690ab3 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -263,11 +263,7 @@ static const struct iomap_ops iomem_ops = {
 
 const struct iomap_ops *iomap_ops[8] = {
 	[0] = &ioport_ops,
-#ifdef CONFIG_DEBUG_IOREMAP
-	[6] = &iomem_ops,
-#else
 	[7] = &iomem_ops
-#endif
 };
 
 
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 21a16747e56e..a5967b7d3726 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -114,30 +114,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 	return error;
 }
 
-#ifdef CONFIG_DEBUG_IOREMAP
-static unsigned long last = 0;
-
-void gsc_bad_addr(unsigned long addr)
-{
-	if (time_after(jiffies, last + HZ*10)) {
-		printk("gsc_foo() called with bad address 0x%lx\n", addr);
-		dump_stack();
-		last = jiffies;
-	}
-}
-EXPORT_SYMBOL(gsc_bad_addr);
-
-void __raw_bad_addr(const volatile void __iomem *addr)
-{
-	if (time_after(jiffies, last + HZ*10)) {
-		printk("__raw_foo() called with bad address 0x%p\n", addr);
-		dump_stack();
-		last = jiffies;
-	}
-}
-EXPORT_SYMBOL(__raw_bad_addr);
-#endif
-
 /*
  * Generic mapping function (not visible outside):
  */
diff --git a/include/asm-parisc/io.h b/include/asm-parisc/io.h
index bcfe0b09c93f..29da31194b91 100644
--- a/include/asm-parisc/io.h
+++ b/include/asm-parisc/io.h
@@ -25,31 +25,11 @@ extern unsigned long parisc_vmerge_max_size;
  *   eg dev->hpa or 0xfee00000.
  */
 
-#ifdef CONFIG_DEBUG_IOREMAP
-#define NYBBLE_SHIFT (BITS_PER_LONG - 4)
-extern void gsc_bad_addr(unsigned long addr);
-extern void __raw_bad_addr(const volatile void __iomem *addr);
-#define gsc_check_addr(addr)					\
-	if ((addr >> NYBBLE_SHIFT) != 0xf) {			\
-		gsc_bad_addr(addr);				\
-		addr |= 0xfUL << NYBBLE_SHIFT;			\
-	}
-#define __raw_check_addr(addr)					\
-	if (((unsigned long)addr >> NYBBLE_SHIFT) != 0xe)	\
-		__raw_bad_addr(addr);			\
-	addr = (void __iomem *)((unsigned long)addr | (0xfUL << NYBBLE_SHIFT));
-#else
-#define gsc_check_addr(addr)
-#define __raw_check_addr(addr)
-#endif
-
 static inline unsigned char gsc_readb(unsigned long addr)
 {
 	long flags;
 	unsigned char ret;
 
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
 	"	ldbx	0(%2),%1\n"
@@ -64,8 +44,6 @@ static inline unsigned short gsc_readw(unsigned long addr)
 	long flags;
 	unsigned short ret;
 
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
 	"	ldhx	0(%2),%1\n"
@@ -79,8 +57,6 @@ static inline unsigned int gsc_readl(unsigned long addr)
 {
 	u32 ret;
 
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	ldwax	0(%1),%0\n"
 	: "=r" (ret) : "r" (addr) );
@@ -91,7 +67,6 @@ static inline unsigned int gsc_readl(unsigned long addr)
 static inline unsigned long long gsc_readq(unsigned long addr)
 {
 	unsigned long long ret;
-	gsc_check_addr(addr);
 
 #ifdef __LP64__
 	__asm__ __volatile__(
@@ -108,8 +83,6 @@ static inline unsigned long long gsc_readq(unsigned long addr)
 static inline void gsc_writeb(unsigned char val, unsigned long addr)
 {
 	long flags;
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
 	"	stbs	%1,0(%2)\n"
@@ -120,8 +93,6 @@ static inline void gsc_writeb(unsigned char val, unsigned long addr)
 static inline void gsc_writew(unsigned short val, unsigned long addr)
 {
 	long flags;
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	rsm	2,%0\n"
 	"	sths	%1,0(%2)\n"
@@ -131,8 +102,6 @@ static inline void gsc_writew(unsigned short val, unsigned long addr)
 
 static inline void gsc_writel(unsigned int val, unsigned long addr)
 {
-	gsc_check_addr(addr);
-
 	__asm__ __volatile__(
 	"	stwas	%0,0(%1)\n"
 	: :  "r" (val), "r" (addr) );
@@ -140,8 +109,6 @@ static inline void gsc_writel(unsigned int val, unsigned long addr)
 
 static inline void gsc_writeq(unsigned long long val, unsigned long addr)
 {
-	gsc_check_addr(addr);
-
 #ifdef __LP64__
 	__asm__ __volatile__(
 	"	stda	%0,0(%1)\n"
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6e8a60f67c7a..d57fd9181b18 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -157,19 +157,6 @@ config DEBUG_INFO
 
 	  If unsure, say N.
 
-config DEBUG_IOREMAP
-	bool "Enable ioremap() debugging"
-	depends on DEBUG_KERNEL && PARISC
-	help
-	  Enabling this option will cause the kernel to distinguish between
-	  ioremapped and physical addresses.  It will print a backtrace (at
-	  most one every 10 seconds), hopefully allowing you to see which
-	  drivers need work.  Fixing all these problems is a prerequisite
-	  for turning on USE_HPPA_IOREMAP.  The warnings are harmless;
-	  the kernel has enough information to fix the broken drivers
-	  automatically, but we'd like to make it more efficient by not
-	  having to do that.
-
 config DEBUG_FS
 	bool "Debug Filesystem"
 	depends on SYSFS
-- 
cgit v1.2.3


From 10267cdd0c2dee46a3f59d93fbfac7229d416dba Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@parisc-linux.org>
Date: Sun, 26 Mar 2006 01:54:16 -0700
Subject: [PARISC] Fixup CONFIG_EISA a bit

Fix up some ISA/EISA stuff.

(Note: isa_ accessors have been removed from asm/io.h)

Signed-off-by: Helge Deller <deller@parisc-linux.org>
Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/mm/ioremap.c | 8 +++-----
 include/asm-parisc/pci.h | 5 +++++
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index a5967b7d3726..5067ea90d91a 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -135,11 +135,9 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 #ifdef CONFIG_EISA
 	unsigned long end = phys_addr + size - 1;
 	/* Support EISA addresses */
-	if ((phys_addr >= 0x00080000 && end < 0x000fffff)
-			|| (phys_addr >= 0x00500000 && end < 0x03bfffff)) {
-		phys_addr |= 0xfc000000;
-#warning "FIXME: EISA regions do not work yet..."
-		return NULL;  /* XXX */
+	if ((phys_addr >= 0x00080000 && end < 0x000fffff) ||
+	    (phys_addr >= 0x00500000 && end < 0x03bfffff)) {
+		phys_addr |= F_EXTEND(0xfc000000);
 	}
 #endif
 
diff --git a/include/asm-parisc/pci.h b/include/asm-parisc/pci.h
index fe7f6a2f5aa7..77bbafb7f73e 100644
--- a/include/asm-parisc/pci.h
+++ b/include/asm-parisc/pci.h
@@ -289,4 +289,9 @@ static inline void pcibios_add_platform_entries(struct pci_dev *dev)
 {
 }
 
+static inline void pcibios_penalize_isa_irq(int irq, int active)
+{
+	/* We don't need to penalize isa irq's */
+}
+
 #endif /* __ASM_PARISC_PCI_H */
-- 
cgit v1.2.3


From bc8846c522264d2522b0082321ec8c2051a4536f Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Fri, 24 Mar 2006 21:22:02 -0700
Subject: [PARISC] More useful readwrite lock helpers

spinlock.c needs _can_lock helpers. Rewrite _is_locked helpers to be
_can_lock helpers.

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 include/asm-parisc/spinlock.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-parisc/spinlock.h b/include/asm-parisc/spinlock.h
index 16c2ac075fc5..a93960e232cf 100644
--- a/include/asm-parisc/spinlock.h
+++ b/include/asm-parisc/spinlock.h
@@ -134,14 +134,22 @@ static  __inline__ int __raw_write_trylock(raw_rwlock_t *rw)
 	return 1;
 }
 
-static __inline__ int __raw_is_read_locked(raw_rwlock_t *rw)
+/*
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw)
 {
-	return rw->counter > 0;
+	return rw->counter >= 0;
 }
 
-static __inline__ int __raw_is_write_locked(raw_rwlock_t *rw)
+/*
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw)
 {
-	return rw->counter < 0;
+	return !rw->counter;
 }
 
 #endif /* __ASM_SPINLOCK_H */
-- 
cgit v1.2.3


From 50a34dbd612925f2ec55b1781632835ef36b97d5 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Fri, 24 Mar 2006 21:24:21 -0700
Subject: [PARISC] Add PREEMPT support

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 arch/parisc/Kconfig              |  6 +-----
 arch/parisc/kernel/entry.S       | 39 ++++++++++++++++++++++++++++++++++++---
 include/asm-parisc/thread_info.h |  3 ++-
 3 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 6b3c50964ca9..2fdf21989dc2 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -177,14 +177,10 @@ config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on ARCH_DISCONTIGMEM_ENABLE
 
+source "kernel/Kconfig.preempt"
 source "kernel/Kconfig.hz"
 source "mm/Kconfig"
 
-config PREEMPT
-	bool
-#	bool "Preemptible Kernel"
-	default n
-
 config COMPAT
 	def_bool y
 	depends on 64BIT
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 6d17c0a3431b..7c95d7663c29 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1014,14 +1014,21 @@ intr_restore:
 	nop
 	nop
 
+#ifndef CONFIG_PREEMPT
+# define intr_do_preempt	intr_restore
+#endif /* !CONFIG_PREEMPT */
+
 	.import schedule,code
 intr_do_resched:
-	/* Only do reschedule if we are returning to user space */
+	/* Only call schedule on return to userspace. If we're returning
+	 * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise
+	 * we jump back to intr_restore.
+	 */
 	LDREG	PT_IASQ0(%r16), %r20
-	CMPIB= 0,%r20,intr_restore /* backward */
+	CMPIB=	0, %r20, intr_do_preempt
 	nop
 	LDREG	PT_IASQ1(%r16), %r20
-	CMPIB= 0,%r20,intr_restore /* backward */
+	CMPIB=	0, %r20, intr_do_preempt
 	nop
 
 #ifdef CONFIG_64BIT
@@ -1037,6 +1044,32 @@ intr_do_resched:
 #endif
 	ldo	R%intr_check_sig(%r2), %r2
 
+	/* preempt the current task on returning to kernel
+	 * mode from an interrupt, iff need_resched is set,
+	 * and preempt_count is 0. otherwise, we continue on
+	 * our merry way back to the current running task.
+	 */
+#ifdef CONFIG_PREEMPT
+	.import preempt_schedule_irq,code
+intr_do_preempt:
+	rsm	PSW_SM_I, %r0		/* disable interrupts */
+
+	/* current_thread_info()->preempt_count */
+	mfctl	%cr30, %r1
+	LDREG	TI_PRE_COUNT(%r1), %r19
+	CMPIB<>	0, %r19, intr_restore	/* if preempt_count > 0 */
+	nop				/* prev insn branched backwards */
+
+	/* check if we interrupted a critical path */
+	LDREG	PT_PSW(%r16), %r20
+	bb,<,n	%r20, 31 - PSW_SM_I, intr_restore
+	nop
+
+	BL	preempt_schedule_irq, %r2
+	nop
+
+	b	intr_restore		/* ssm PSW_SM_I done by intr_restore */
+#endif /* CONFIG_PREEMPT */
 
 	.import do_signal,code
 intr_do_signal:
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h
index ac32f140b83a..f2f83b04cd8b 100644
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -49,7 +49,8 @@ struct thread_info {
 
 #endif /* !__ASSEMBLY */
 
-#define PREEMPT_ACTIVE          0x10000000
+#define PREEMPT_ACTIVE_BIT	28
+#define PREEMPT_ACTIVE		(1 << PREEMPT_ACTIVE_BIT)
 
 /*
  * thread information flags
-- 
cgit v1.2.3


From 2746ae14fe55f9483ae94ef89d4495d0eb8ee03d Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Wed, 29 Mar 2006 07:04:49 -0700
Subject: [PARISC] Make local_t use atomic_long_t

As done in asm-generic/local.h in mainline. Otherwise local_t was 32-bit
even on a 64-bit kernel.

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 include/asm-parisc/local.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-parisc/local.h b/include/asm-parisc/local.h
index 892b3b2c4962..d0f550912755 100644
--- a/include/asm-parisc/local.h
+++ b/include/asm-parisc/local.h
@@ -4,16 +4,16 @@
 #include <linux/percpu.h>
 #include <asm/atomic.h>
 
-typedef atomic_t local_t;
+typedef atomic_long_t local_t;
 
-#define LOCAL_INIT(i)	ATOMIC_INIT(i)
-#define local_read(v)	atomic_read(v)
-#define local_set(v,i)	atomic_set(v,i)
+#define LOCAL_INIT(i)	ATOMIC_LONG_INIT(i)
+#define local_read(v)	atomic_long_read(v)
+#define local_set(v,i)	atomic_long_set(v,i)
 
-#define local_inc(v)	atomic_inc(v)
-#define local_dec(v)	atomic_dec(v)
-#define local_add(i, v)	atomic_add(i, v)
-#define local_sub(i, v)	atomic_sub(i, v)
+#define local_inc(v)	atomic_long_inc(v)
+#define local_dec(v)	atomic_long_dec(v)
+#define local_add(i, v)	atomic_long_add(i, v)
+#define local_sub(i, v)	atomic_long_sub(i, v)
 
 #define __local_inc(v)		((v)->counter++)
 #define __local_dec(v)		((v)->counter--)
-- 
cgit v1.2.3


From 4da9f131a74d12de56c44da6d522a9116da06805 Mon Sep 17 00:00:00 2001
From: Kyle McMartin <kyle@parisc-linux.org>
Date: Wed, 29 Mar 2006 19:47:32 -0500
Subject: [PARISC] Add atomic_sub_and_test

Define atomic_sub_and_test to fix build failures.

Signed-off-by: Kyle McMartin <kyle@parisc-linux.org>
---
 include/asm-parisc/atomic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 4dc7253ff5d0..403ea97316cf 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -210,6 +210,8 @@ static __inline__ int atomic_read(const atomic_t *v)
 
 #define atomic_dec_and_test(v)	(atomic_dec_return(v) == 0)
 
+#define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
+
 #define ATOMIC_INIT(i)	((atomic_t) { (i) })
 
 #define smp_mb__before_atomic_dec()	smp_mb()
@@ -267,6 +269,7 @@ atomic64_read(const atomic64_t *v)
 
 #define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
+#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i),(v)) == 0)
 
 #endif /* __LP64__ */
 
-- 
cgit v1.2.3


From 5274f052e7b3dbd81935772eb551dfd0325dfa9d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:15:30 +0200
Subject: [PATCH] Introduce sys_splice() system call

This adds support for the sys_splice system call. Using a pipe as a
transport, it can connect to files or sockets (latter as output only).

From the splice.c comments:

   "splice": joining two ropes together by interweaving their strands.

   This is the "extended pipe" functionality, where a pipe is used as
   an arbitrary in-memory buffer. Think of a pipe as a small kernel
   buffer that you can use to transfer data from one end to the other.

   The traditional unix read/write is extended with a "splice()" operation
   that transfers data buffers to or from a pipe buffer.

   Named by Larry McVoy, original implementation from Linus, extended by
   Jens to support splicing to files and fixing the initial implementation
   bugs.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |   1 +
 arch/ia64/kernel/entry.S         |   1 +
 fs/Makefile                      |   2 +-
 fs/ext2/file.c                   |   2 +
 fs/ext3/file.c                   |   2 +
 fs/pipe.c                        |  33 ++-
 fs/reiserfs/file.c               |   2 +
 fs/splice.c                      | 612 +++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h        |   3 +-
 include/asm-ia64/unistd.h        |   3 +-
 include/asm-powerpc/unistd.h     |   3 +-
 include/asm-x86_64/unistd.h      |   4 +-
 include/linux/fs.h               |   4 +
 include/linux/syscalls.h         |   2 +
 net/socket.c                     |   6 +-
 15 files changed, 669 insertions(+), 11 deletions(-)
 create mode 100644 fs/splice.c

(limited to 'include')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 326595f3fa4d..ce3ef4fa0551 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -312,3 +312,4 @@ ENTRY(sys_call_table)
 	.long sys_unshare		/* 310 */
 	.long sys_set_robust_list
 	.long sys_get_robust_list
+	.long sys_splice
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0e3eda99e549..750e8e7fbdc3 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1605,5 +1605,6 @@ sys_call_table:
 	data8 sys_ni_syscall			// reserved for pselect
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
+	data8 sys_splice
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
diff --git a/fs/Makefile b/fs/Makefile
index 080b3867be4d..f3a4f7077175 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o
+		ioprio.o pnode.o drop_caches.o splice.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 509cceca04db..23e2c7ccec1d 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,6 +53,8 @@ const struct file_operations ext2_file_operations = {
 	.readv		= generic_file_readv,
 	.writev		= generic_file_writev,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 783a796220bb..1efefb630ea9 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -119,6 +119,8 @@ const struct file_operations ext3_file_operations = {
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/pipe.c b/fs/pipe.c
index e2f4f1d9ffc2..2414bf270db6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -15,6 +15,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/uio.h>
 #include <linux/highmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -94,11 +95,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buff
 {
 	struct page *page = buf->page;
 
-	if (info->tmp_page) {
-		__free_page(page);
+	/*
+	 * If nobody else uses this page, and we don't already have a
+	 * temporary page, let's keep track of it as a one-deep
+	 * allocation cache
+	 */
+	if (page_count(page) == 1 && !info->tmp_page) {
+		info->tmp_page = page;
 		return;
 	}
-	info->tmp_page = page;
+
+	/*
+	 * Otherwise just release our reference to it
+	 */
+	page_cache_release(page);
 }
 
 static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
@@ -152,6 +162,11 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 				chars = total_len;
 
 			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				if (!ret)
+					ret = PTR_ERR(addr);
+				break;
+			}
 			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
 			ops->unmap(info, buf);
 			if (unlikely(error)) {
@@ -254,8 +269,16 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 		struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			void *addr = ops->map(filp, info, buf);
-			int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
+			void *addr;
+			int error;
+
+			addr = ops->map(filp, info, buf);
+			if (IS_ERR(addr)) {
+				error = PTR_ERR(addr);
+				goto out;
+			}
+			error = pipe_iov_copy_from_user(offset + addr, iov,
+							chars);
 			ops->unmap(info, buf);
 			ret = error;
 			do_wakeup = 1;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 010094d14da6..cf6e1cf40351 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1576,6 +1576,8 @@ const struct file_operations reiserfs_file_operations = {
 	.sendfile = generic_file_sendfile,
 	.aio_read = generic_file_aio_read,
 	.aio_write = reiserfs_aio_write,
+	.splice_read = generic_file_splice_read,
+	.splice_write = generic_file_splice_write,
 };
 
 struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/splice.c b/fs/splice.c
new file mode 100644
index 000000000000..efa47c1c4e13
--- /dev/null
+++ b/fs/splice.c
@@ -0,0 +1,612 @@
+/*
+ * "splice": joining two ropes together by interweaving their strands.
+ *
+ * This is the "extended pipe" functionality, where a pipe is used as
+ * an arbitrary in-memory buffer. Think of a pipe as a small kernel
+ * buffer that you can use to transfer data from one end to the other.
+ *
+ * The traditional unix read/write is extended with a "splice()" operation
+ * that transfers data buffers to or from a pipe buffer.
+ *
+ * Named by Larry McVoy, original implementation from Linus, extended by
+ * Jens to support splicing to files and fixing the initial implementation
+ * bugs.
+ *
+ * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2005 Linus Torvalds <torvalds@osdl.org>
+ *
+ */
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/mm_inline.h>
+
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+	unsigned int len, total_len;	/* current and remaining length */
+	unsigned int flags;		/* splice flags */
+	struct file *file;		/* file to read/write */
+	loff_t pos;			/* file position */
+};
+
+static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
+					struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+	buf->page = NULL;
+}
+
+static void *page_cache_pipe_buf_map(struct file *file,
+				     struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	lock_page(page);
+
+	if (!PageUptodate(page)) {
+		unlock_page(page);
+		return ERR_PTR(-EIO);
+	}
+
+	if (!page->mapping) {
+		unlock_page(page);
+		return ERR_PTR(-ENODATA);
+	}
+
+	return kmap(buf->page);
+}
+
+static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
+				      struct pipe_buffer *buf)
+{
+	unlock_page(buf->page);
+	kunmap(buf->page);
+}
+
+static struct pipe_buf_operations page_cache_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = page_cache_pipe_buf_map,
+	.unmap = page_cache_pipe_buf_unmap,
+	.release = page_cache_pipe_buf_release,
+};
+
+static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
+			    int nr_pages, unsigned long offset,
+			    unsigned long len)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, i;
+
+	ret = 0;
+	do_wakeup = 0;
+	i = 0;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs;
+
+		if (!PIPE_READERS(*inode)) {
+			send_sig(SIGPIPE, current, 0);
+			if (!ret)
+				ret = -EPIPE;
+			break;
+		}
+
+		bufs = info->nrbufs;
+		if (bufs < PIPE_BUFFERS) {
+			int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1);
+			struct pipe_buffer *buf = info->bufs + newbuf;
+			struct page *page = pages[i++];
+			unsigned long this_len;
+
+			this_len = PAGE_CACHE_SIZE - offset;
+			if (this_len > len)
+				this_len = len;
+
+			buf->page = page;
+			buf->offset = offset;
+			buf->len = this_len;
+			buf->ops = &page_cache_pipe_buf_ops;
+			info->nrbufs = ++bufs;
+			do_wakeup = 1;
+
+			ret += this_len;
+			len -= this_len;
+			offset = 0;
+			if (!--nr_pages)
+				break;
+			if (!len)
+				break;
+			if (bufs < PIPE_BUFFERS)
+				continue;
+
+			break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO,
+				    POLL_IN);
+			do_wakeup = 0;
+		}
+
+		PIPE_WAITING_WRITERS(*inode)++;
+		pipe_wait(inode);
+		PIPE_WAITING_WRITERS(*inode)--;
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
+	}
+
+	while (i < nr_pages)
+		page_cache_release(pages[i++]);
+
+	return ret;
+}
+
+static int __generic_file_splice_read(struct file *in, struct inode *pipe,
+				      size_t len)
+{
+	struct address_space *mapping = in->f_mapping;
+	unsigned int offset, nr_pages;
+	struct page *pages[PIPE_BUFFERS], *shadow[PIPE_BUFFERS];
+	struct page *page;
+	pgoff_t index, pidx;
+	int i, j;
+
+	index = in->f_pos >> PAGE_CACHE_SHIFT;
+	offset = in->f_pos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	if (nr_pages > PIPE_BUFFERS)
+		nr_pages = PIPE_BUFFERS;
+
+	/*
+	 * initiate read-ahead on this page range
+	 */
+	do_page_cache_readahead(mapping, in, index, nr_pages);
+
+	/*
+	 * Get as many pages from the page cache as possible..
+	 * Start IO on the page cache entries we create (we
+	 * can assume that any pre-existing ones we find have
+	 * already had IO started on them).
+	 */
+	i = find_get_pages(mapping, index, nr_pages, pages);
+
+	/*
+	 * common case - we found all pages and they are contiguous,
+	 * kick them off
+	 */
+	if (i && (pages[i - 1]->index == index + i - 1))
+		goto splice_them;
+
+	/*
+	 * fill shadow[] with pages at the right locations, so we only
+	 * have to fill holes
+	 */
+	memset(shadow, 0, i * sizeof(struct page *));
+	for (j = 0, pidx = index; j < i; pidx++, j++)
+		shadow[pages[j]->index - pidx] = pages[j];
+
+	/*
+	 * now fill in the holes
+	 */
+	for (i = 0, pidx = index; i < nr_pages; pidx++, i++) {
+		int error;
+
+		if (shadow[i])
+			continue;
+
+		/*
+		 * no page there, look one up / create it
+		 */
+		page = find_or_create_page(mapping, pidx,
+						   mapping_gfp_mask(mapping));
+		if (!page)
+			break;
+
+		if (PageUptodate(page))
+			unlock_page(page);
+		else {
+			error = mapping->a_ops->readpage(in, page);
+
+			if (unlikely(error)) {
+				page_cache_release(page);
+				break;
+			}
+		}
+		shadow[i] = page;
+	}
+
+	if (!i) {
+		for (i = 0; i < nr_pages; i++) {
+			 if (shadow[i])
+				page_cache_release(shadow[i]);
+		}
+		return 0;
+	}
+
+	memcpy(pages, shadow, i * sizeof(struct page *));
+
+	/*
+	 * Now we splice them into the pipe..
+	 */
+splice_them:
+	return move_to_pipe(pipe, pages, i, offset, len);
+}
+
+ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
+				 size_t len, unsigned int flags)
+{
+	ssize_t spliced;
+	int ret;
+
+	ret = 0;
+	spliced = 0;
+	while (len) {
+		ret = __generic_file_splice_read(in, pipe, len);
+
+		if (ret <= 0)
+			break;
+
+		in->f_pos += ret;
+		len -= ret;
+		spliced += ret;
+	}
+
+	if (spliced)
+		return spliced;
+
+	return ret;
+}
+
+/*
+ * Send 'len' bytes to socket from 'file' at position 'pos' using sendpage().
+ */
+static int pipe_to_sendpage(struct pipe_inode_info *info,
+			    struct pipe_buffer *buf, struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	loff_t pos = sd->pos;
+	unsigned int offset;
+	ssize_t ret;
+	void *ptr;
+
+	/*
+	 * sub-optimal, but we are limited by the pipe ->map. we don't
+	 * need a kmap'ed buffer here, we just want to make sure we
+	 * have the page pinned if the pipe page originates from the
+	 * page cache
+	 */
+	ptr = buf->ops->map(file, info, buf);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	offset = pos & ~PAGE_CACHE_MASK;
+
+	ret = file->f_op->sendpage(file, buf->page, offset, sd->len, &pos,
+					sd->len < sd->total_len);
+
+	buf->ops->unmap(info, buf);
+	if (ret == sd->len)
+		return 0;
+
+	return -EIO;
+}
+
+/*
+ * This is a little more tricky than the file -> pipe splicing. There are
+ * basically three cases:
+ *
+ *	- Destination page already exists in the address space and there
+ *	  are users of it. For that case we have no other option that
+ *	  copying the data. Tough luck.
+ *	- Destination page already exists in the address space, but there
+ *	  are no users of it. Make sure it's uptodate, then drop it. Fall
+ *	  through to last case.
+ *	- Destination page does not exist, we can add the pipe page to
+ *	  the page cache and avoid the copy.
+ *
+ * For now we just do the slower thing and always copy pages over, it's
+ * easier than migrating pages from the pipe to the target file. For the
+ * case of doing file | file splicing, the migrate approach had some LRU
+ * nastiness...
+ */
+static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
+			struct splice_desc *sd)
+{
+	struct file *file = sd->file;
+	struct address_space *mapping = file->f_mapping;
+	unsigned int offset;
+	struct page *page;
+	char *src, *dst;
+	pgoff_t index;
+	int ret;
+
+	/*
+	 * after this, page will be locked and unmapped
+	 */
+	src = buf->ops->map(file, info, buf);
+	if (IS_ERR(src))
+		return PTR_ERR(src);
+
+	index = sd->pos >> PAGE_CACHE_SHIFT;
+	offset = sd->pos & ~PAGE_CACHE_MASK;
+
+find_page:
+	ret = -ENOMEM;
+	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
+	if (!page)
+		goto out;
+
+	/*
+	 * If the page is uptodate, it is also locked. If it isn't
+	 * uptodate, we can mark it uptodate if we are filling the
+	 * full page. Otherwise we need to read it in first...
+	 */
+	if (!PageUptodate(page)) {
+		if (sd->len < PAGE_CACHE_SIZE) {
+			ret = mapping->a_ops->readpage(file, page);
+			if (unlikely(ret))
+				goto out;
+
+			lock_page(page);
+
+			if (!PageUptodate(page)) {
+				/*
+				 * page got invalidated, repeat
+				 */
+				if (!page->mapping) {
+					unlock_page(page);
+					page_cache_release(page);
+					goto find_page;
+				}
+				ret = -EIO;
+				goto out;
+			}
+		} else {
+			WARN_ON(!PageLocked(page));
+			SetPageUptodate(page);
+		}
+	}
+
+	ret = mapping->a_ops->prepare_write(file, page, 0, sd->len);
+	if (ret)
+		goto out;
+
+	dst = kmap_atomic(page, KM_USER0);
+	memcpy(dst + offset, src + buf->offset, sd->len);
+	flush_dcache_page(page);
+	kunmap_atomic(dst, KM_USER0);
+
+	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
+	if (ret < 0)
+		goto out;
+
+	set_page_dirty(page);
+	ret = write_one_page(page, 0);
+out:
+	if (ret < 0)
+		unlock_page(page);
+	page_cache_release(page);
+	buf->ops->unmap(info, buf);
+	return ret;
+}
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+			   struct splice_desc *);
+
+static ssize_t move_from_pipe(struct inode *inode, struct file *out,
+			      size_t len, unsigned int flags,
+			      splice_actor *actor)
+{
+	struct pipe_inode_info *info;
+	int ret, do_wakeup, err;
+	struct splice_desc sd;
+
+	ret = 0;
+	do_wakeup = 0;
+
+	sd.total_len = len;
+	sd.flags = flags;
+	sd.file = out;
+	sd.pos = out->f_pos;
+
+	mutex_lock(PIPE_MUTEX(*inode));
+
+	info = inode->i_pipe;
+	for (;;) {
+		int bufs = info->nrbufs;
+
+		if (bufs) {
+			int curbuf = info->curbuf;
+			struct pipe_buffer *buf = info->bufs + curbuf;
+			struct pipe_buf_operations *ops = buf->ops;
+
+			sd.len = buf->len;
+			if (sd.len > sd.total_len)
+				sd.len = sd.total_len;
+
+			err = actor(info, buf, &sd);
+			if (err) {
+				if (!ret && err != -ENODATA)
+					ret = err;
+
+				break;
+			}
+
+			ret += sd.len;
+			buf->offset += sd.len;
+			buf->len -= sd.len;
+			if (!buf->len) {
+				buf->ops = NULL;
+				ops->release(info, buf);
+				curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1);
+				info->curbuf = curbuf;
+				info->nrbufs = --bufs;
+				do_wakeup = 1;
+			}
+
+			sd.pos += sd.len;
+			sd.total_len -= sd.len;
+			if (!sd.total_len)
+				break;
+		}
+
+		if (bufs)
+			continue;
+		if (!PIPE_WRITERS(*inode))
+			break;
+		if (!PIPE_WAITING_WRITERS(*inode)) {
+			if (ret)
+				break;
+		}
+
+		if (signal_pending(current)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (do_wakeup) {
+			wake_up_interruptible_sync(PIPE_WAIT(*inode));
+			kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT);
+			do_wakeup = 0;
+		}
+
+		pipe_wait(inode);
+	}
+
+	mutex_unlock(PIPE_MUTEX(*inode));
+
+	if (do_wakeup) {
+		wake_up_interruptible(PIPE_WAIT(*inode));
+		kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
+	}
+
+	mutex_lock(&out->f_mapping->host->i_mutex);
+	out->f_pos = sd.pos;
+	mutex_unlock(&out->f_mapping->host->i_mutex);
+	return ret;
+
+}
+
+ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
+				  size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_file);
+}
+
+ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+				size_t len, unsigned int flags)
+{
+	return move_from_pipe(inode, out, len, flags, pipe_to_sendpage);
+}
+
+static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
+			   unsigned int flags)
+{
+	loff_t pos;
+	int ret;
+
+	if (!out->f_op || !out->f_op->splice_write)
+		return -EINVAL;
+
+	if (!(out->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	pos = out->f_pos;
+	ret = rw_verify_area(WRITE, out, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	return out->f_op->splice_write(pipe, out, len, flags);
+}
+
+static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
+			 unsigned int flags)
+{
+	loff_t pos, isize, left;
+	int ret;
+
+	if (!in->f_op || !in->f_op->splice_read)
+		return -EINVAL;
+
+	if (!(in->f_mode & FMODE_READ))
+		return -EBADF;
+
+	pos = in->f_pos;
+	ret = rw_verify_area(READ, in, &pos, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	isize = i_size_read(in->f_mapping->host);
+	if (unlikely(in->f_pos >= isize))
+		return 0;
+	
+	left = isize - in->f_pos;
+	if (left < len)
+		len = left;
+
+	return in->f_op->splice_read(in, pipe, len, flags);
+}
+
+static long do_splice(struct file *in, struct file *out, size_t len,
+		      unsigned int flags)
+{
+	struct inode *pipe;
+
+	pipe = in->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_from(pipe, out, len, flags);
+
+	pipe = out->f_dentry->d_inode;
+	if (pipe->i_pipe)
+		return do_splice_to(in, pipe, len, flags);
+
+	return -EINVAL;
+}
+
+asmlinkage long sys_splice(int fdin, int fdout, size_t len, unsigned int flags)
+{
+	long error;
+	struct file *in, *out;
+	int fput_in, fput_out;
+
+	if (unlikely(!len))
+		return 0;
+
+	error = -EBADF;
+	in = fget_light(fdin, &fput_in);
+	if (in) {
+		if (in->f_mode & FMODE_READ) {
+			out = fget_light(fdout, &fput_out);
+			if (out) {
+				if (out->f_mode & FMODE_WRITE)
+					error = do_splice(in, out, len, flags);
+				fput_light(out, fput_out);
+			}
+		}
+
+		fput_light(in, fput_in);
+	}
+
+	return error;
+}
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 014e3562895b..789e9bdd0a40 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -318,8 +318,9 @@
 #define __NR_unshare		310
 #define __NR_set_robust_list	311
 #define __NR_get_robust_list	312
+#define __NR_sys_splice		313
 
-#define NR_syscalls 313
+#define NR_syscalls 314
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 019956c613e4..36070c1014d8 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -285,12 +285,13 @@
 #define __NR_faccessat			1293
 /* 1294, 1295 reserved for pselect/ppoll */
 #define __NR_unshare			1296
+#define __NR_splice			1297
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls			273 /* length of syscall table */
+#define NR_syscalls			274 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 1e990747dce7..536ba0873052 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -301,8 +301,9 @@
 #define __NR_pselect6		280
 #define __NR_ppoll		281
 #define __NR_unshare		282
+#define __NR_splice		283
 
-#define __NR_syscalls		283
+#define __NR_syscalls		284
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index fcc516353087..f21ff2c1e960 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -609,8 +609,10 @@ __SYSCALL(__NR_unshare,	sys_unshare)
 __SYSCALL(__NR_set_robust_list, sys_set_robust_list)
 #define __NR_get_robust_list	274
 __SYSCALL(__NR_get_robust_list, sys_get_robust_list)
+#define __NR_splice		275
+__SYSCALL(__NR_splice, sys_splice)
 
-#define __NR_syscall_max __NR_get_robust_list
+#define __NR_syscall_max __NR_splice
 
 #ifndef __NO_STUBS
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fe89498f4..20fa5f6d7269 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1032,6 +1032,8 @@ struct file_operations {
 	int (*check_flags)(int);
 	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
+	ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int);
+	ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int);
 };
 
 struct inode_operations {
@@ -1609,6 +1611,8 @@ extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor
 extern void do_generic_mapping_read(struct address_space *mapping,
 				    struct file_ra_state *, struct file *,
 				    loff_t *, read_descriptor_t *, read_actor_t);
+extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int);
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e487e3b60f60..e78ffc7d5b56 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -569,5 +569,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
 asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 				   int flags, int mode);
 asmlinkage long sys_unshare(unsigned long unshare_flags);
+asmlinkage long sys_splice(int fdin, int fdout, size_t len,
+				unsigned int flags);
 
 #endif
diff --git a/net/socket.c b/net/socket.c
index fcd77eac0ccf..b13042f68c02 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -119,6 +119,9 @@ static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 
+extern ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+				size_t len, unsigned int flags);
+
 
 /*
  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -141,7 +144,8 @@ static struct file_operations socket_file_ops = {
 	.fasync =	sock_fasync,
 	.readv =	sock_readv,
 	.writev =	sock_writev,
-	.sendpage =	sock_sendpage
+	.sendpage =	sock_sendpage,
+	.splice_write = generic_splice_sendpage,
 };
 
 /*
-- 
cgit v1.2.3


From 5abc97aa25b2c41413b3a520faee83f2282d9f18 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 30 Mar 2006 15:16:46 +0200
Subject: [PATCH] splice: add support for SPLICE_F_MOVE flag

This enables the caller to migrate pages from one address space page
cache to another.  In buzz word marketing, you can do zero-copy file
copies!

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c                 |   8 +++
 fs/splice.c               | 121 ++++++++++++++++++++++++++++++++--------------
 include/linux/pipe_fs_i.h |   8 +++
 3 files changed, 100 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/fs/pipe.c b/fs/pipe.c
index 2414bf270db6..109a102c150d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -121,11 +121,19 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer
 	kunmap(buf->page);
 }
 
+static int anon_pipe_buf_steal(struct pipe_inode_info *info,
+			       struct pipe_buffer *buf)
+{
+	buf->stolen = 1;
+	return 0;
+}
+
 static struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = anon_pipe_buf_map,
 	.unmap = anon_pipe_buf_unmap,
 	.release = anon_pipe_buf_release,
+	.steal = anon_pipe_buf_steal,
 };
 
 static ssize_t
diff --git a/fs/splice.c b/fs/splice.c
index efa47c1c4e13..4a026f95884f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -21,6 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/mm_inline.h>
+#include <linux/swap.h>
 
 /*
  * Passed to the actors
@@ -32,11 +33,37 @@ struct splice_desc {
 	loff_t pos;			/* file position */
 };
 
+static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
+				     struct pipe_buffer *buf)
+{
+	struct page *page = buf->page;
+
+	WARN_ON(!PageLocked(page));
+	WARN_ON(!PageUptodate(page));
+
+	if (!remove_mapping(page_mapping(page), page))
+		return 1;
+
+	if (PageLRU(page)) {
+		struct zone *zone = page_zone(page);
+
+		spin_lock_irq(&zone->lru_lock);
+		BUG_ON(!PageLRU(page));
+		__ClearPageLRU(page);
+		del_page_from_lru(zone, page);
+		spin_unlock_irq(&zone->lru_lock);
+	}
+
+	buf->stolen = 1;
+	return 0;
+}
+
 static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
 	buf->page = NULL;
+	buf->stolen = 0;
 }
 
 static void *page_cache_pipe_buf_map(struct file *file,
@@ -63,7 +90,8 @@ static void *page_cache_pipe_buf_map(struct file *file,
 static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
 				      struct pipe_buffer *buf)
 {
-	unlock_page(buf->page);
+	if (!buf->stolen)
+		unlock_page(buf->page);
 	kunmap(buf->page);
 }
 
@@ -72,6 +100,7 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.map = page_cache_pipe_buf_map,
 	.unmap = page_cache_pipe_buf_unmap,
 	.release = page_cache_pipe_buf_release,
+	.steal = page_cache_pipe_buf_steal,
 };
 
 static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
@@ -336,8 +365,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	struct address_space *mapping = file->f_mapping;
 	unsigned int offset;
 	struct page *page;
-	char *src, *dst;
 	pgoff_t index;
+	char *src;
 	int ret;
 
 	/*
@@ -350,40 +379,54 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	index = sd->pos >> PAGE_CACHE_SHIFT;
 	offset = sd->pos & ~PAGE_CACHE_MASK;
 
-find_page:
-	ret = -ENOMEM;
-	page = find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
-	if (!page)
-		goto out;
-
 	/*
-	 * If the page is uptodate, it is also locked. If it isn't
-	 * uptodate, we can mark it uptodate if we are filling the
-	 * full page. Otherwise we need to read it in first...
+	 * reuse buf page, if SPLICE_F_MOVE is set
 	 */
-	if (!PageUptodate(page)) {
-		if (sd->len < PAGE_CACHE_SIZE) {
-			ret = mapping->a_ops->readpage(file, page);
-			if (unlikely(ret))
-				goto out;
-
-			lock_page(page);
-
-			if (!PageUptodate(page)) {
-				/*
-				 * page got invalidated, repeat
-				 */
-				if (!page->mapping) {
-					unlock_page(page);
-					page_cache_release(page);
-					goto find_page;
+	if (sd->flags & SPLICE_F_MOVE) {
+		if (buf->ops->steal(info, buf))
+			goto find_page;
+
+		page = buf->page;
+		if (add_to_page_cache_lru(page, mapping, index,
+						mapping_gfp_mask(mapping)))
+			goto find_page;
+	} else {
+find_page:
+		ret = -ENOMEM;
+		page = find_or_create_page(mapping, index,
+						mapping_gfp_mask(mapping));
+		if (!page)
+			goto out;
+
+		/*
+		 * If the page is uptodate, it is also locked. If it isn't
+		 * uptodate, we can mark it uptodate if we are filling the
+		 * full page. Otherwise we need to read it in first...
+		 */
+		if (!PageUptodate(page)) {
+			if (sd->len < PAGE_CACHE_SIZE) {
+				ret = mapping->a_ops->readpage(file, page);
+				if (unlikely(ret))
+					goto out;
+
+				lock_page(page);
+
+				if (!PageUptodate(page)) {
+					/*
+					 * page got invalidated, repeat
+					 */
+					if (!page->mapping) {
+						unlock_page(page);
+						page_cache_release(page);
+						goto find_page;
+					}
+					ret = -EIO;
+					goto out;
 				}
-				ret = -EIO;
-				goto out;
+			} else {
+				WARN_ON(!PageLocked(page));
+				SetPageUptodate(page);
 			}
-		} else {
-			WARN_ON(!PageLocked(page));
-			SetPageUptodate(page);
 		}
 	}
 
@@ -391,10 +434,13 @@ find_page:
 	if (ret)
 		goto out;
 
-	dst = kmap_atomic(page, KM_USER0);
-	memcpy(dst + offset, src + buf->offset, sd->len);
-	flush_dcache_page(page);
-	kunmap_atomic(dst, KM_USER0);
+	if (!buf->stolen) {
+		char *dst = kmap_atomic(page, KM_USER0);
+
+		memcpy(dst + offset, src + buf->offset, sd->len);
+		flush_dcache_page(page);
+		kunmap_atomic(dst, KM_USER0);
+	}
 
 	ret = mapping->a_ops->commit_write(file, page, 0, sd->len);
 	if (ret < 0)
@@ -405,7 +451,8 @@ find_page:
 out:
 	if (ret < 0)
 		unlock_page(page);
-	page_cache_release(page);
+	if (!buf->stolen)
+		page_cache_release(page);
 	buf->ops->unmap(info, buf);
 	return ret;
 }
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index b12e59c75752..75c7f55023ab 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -9,6 +9,7 @@ struct pipe_buffer {
 	struct page *page;
 	unsigned int offset, len;
 	struct pipe_buf_operations *ops;
+	unsigned int stolen;
 };
 
 struct pipe_buf_operations {
@@ -16,6 +17,7 @@ struct pipe_buf_operations {
 	void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *);
 	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
+	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 struct pipe_inode_info {
@@ -53,4 +55,10 @@ void pipe_wait(struct inode * inode);
 struct inode* pipe_new(struct inode* inode);
 void free_pipe_info(struct inode* inode);
 
+/*
+ * splice is tied to pipes as a transport (at least for now), so we'll just
+ * add the splice flags here.
+ */
+#define SPLICE_F_MOVE	(0x01)	/* move pages instead of copying */
+
 #endif
-- 
cgit v1.2.3


From f19180056ea09ec6a5d32e741234451a1e6eba4d Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Mon, 27 Feb 2006 11:37:45 +0800
Subject: [IA64] Export cpu cache info by sysfs

The patch exports 8 attributes of cpu cache info under
/sys/devices/system/cpu/cpuX/cache/indexX:
1) level
2) type
3) coherency_line_size
4) ways_of_associativity
5) size
6) shared_cpu_map
7) attributes
8) number_of_sets: number_of_sets=size/ways_of_associativity/coherency_line_size.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/topology.c | 367 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-ia64/pal.h      |  28 ++++
 2 files changed, 395 insertions(+)

(limited to 'include')

diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 3b6fd798c4d6..b47476d655f1 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -9,6 +9,8 @@
  * 		2002/08/07 Erich Focht <efocht@ess.nec.de>
  * Populate cpu entries in sysfs for non-numa systems as well
  *  	Intel Corporation - Ashok Raj
+ * 02/27/2006 Zhang, Yanmin
+ *	Populate cpu cache entries in sysfs for cpu cache info
  */
 
 #include <linux/config.h>
@@ -19,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/nodemask.h>
+#include <linux/notifier.h>
 #include <asm/mmzone.h>
 #include <asm/numa.h>
 #include <asm/cpu.h>
@@ -101,3 +104,367 @@ out:
 }
 
 subsys_initcall(topology_init);
+
+
+/*
+ * Export cpu cache information through sysfs
+ */
+
+/*
+ *  A bunch of string array to get pretty printing
+ */
+static const char *cache_types[] = {
+	"",			/* not used */
+	"Instruction",
+	"Data",
+	"Unified"	/* unified */
+};
+
+static const char *cache_mattrib[]={
+	"WriteThrough",
+	"WriteBack",
+	"",		/* reserved */
+	""		/* reserved */
+};
+
+struct cache_info {
+	pal_cache_config_info_t	cci;
+	cpumask_t shared_cpu_map;
+	int level;
+	int type;
+	struct kobject kobj;
+};
+
+struct cpu_cache_info {
+	struct cache_info *cache_leaves;
+	int	num_cache_leaves;
+	struct kobject kobj;
+};
+
+static struct cpu_cache_info	all_cpu_cache_info[NR_CPUS];
+#define LEAF_KOBJECT_PTR(x,y)    (&all_cpu_cache_info[x].cache_leaves[y])
+
+#ifdef CONFIG_SMP
+static void cache_shared_cpu_map_setup( unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	pal_cache_shared_info_t	csi;
+	int num_shared, i = 0;
+	unsigned int j;
+
+	if (cpu_data(cpu)->threads_per_core <= 1 &&
+		cpu_data(cpu)->cores_per_socket <= 1) {
+		cpu_set(cpu, this_leaf->shared_cpu_map);
+		return;
+	}
+
+	if (ia64_pal_cache_shared_info(this_leaf->level,
+					this_leaf->type,
+					0,
+					&csi) != PAL_STATUS_SUCCESS)
+		return;
+
+	num_shared = (int) csi.num_shared;
+	do {
+		for_each_cpu(j)
+			if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id
+				&& cpu_data(j)->core_id == csi.log1_cid
+				&& cpu_data(j)->thread_id == csi.log1_tid)
+				cpu_set(j, this_leaf->shared_cpu_map);
+
+		i++;
+	} while (i < num_shared &&
+		ia64_pal_cache_shared_info(this_leaf->level,
+				this_leaf->type,
+				i,
+				&csi) == PAL_STATUS_SUCCESS);
+}
+#else
+static void cache_shared_cpu_map_setup(unsigned int cpu,
+		struct cache_info * this_leaf)
+{
+	cpu_set(cpu, this_leaf->shared_cpu_map);
+	return;
+}
+#endif
+
+static ssize_t show_coherency_line_size(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size);
+}
+
+static ssize_t show_ways_of_associativity(struct cache_info *this_leaf,
+					char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc);
+}
+
+static ssize_t show_attributes(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf,
+			"%s\n",
+			cache_mattrib[this_leaf->cci.pcci_cache_attr]);
+}
+
+static ssize_t show_size(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024);
+}
+
+static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf)
+{
+	unsigned number_of_sets = this_leaf->cci.pcci_cache_size;
+	number_of_sets /= this_leaf->cci.pcci_assoc;
+	number_of_sets /= 1 << this_leaf->cci.pcci_line_size;
+
+	return sprintf(buf, "%u\n", number_of_sets);
+}
+
+static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
+{
+	ssize_t	len;
+	cpumask_t shared_cpu_map;
+
+	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+	len += sprintf(buf+len, "\n");
+	return len;
+}
+
+static ssize_t show_type(struct cache_info *this_leaf, char *buf)
+{
+	int type = this_leaf->type + this_leaf->cci.pcci_unified;
+	return sprintf(buf, "%s\n", cache_types[type]);
+}
+
+static ssize_t show_level(struct cache_info *this_leaf, char *buf)
+{
+	return sprintf(buf, "%u\n", this_leaf->level);
+}
+
+struct cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cache_info *, char *);
+	ssize_t (*store)(struct cache_info *, const char *, size_t count);
+};
+
+#ifdef define_one_ro
+	#undef define_one_ro
+#endif
+#define define_one_ro(_name) \
+	static struct cache_attr _name = \
+__ATTR(_name, 0444, show_##_name, NULL)
+
+define_one_ro(level);
+define_one_ro(type);
+define_one_ro(coherency_line_size);
+define_one_ro(ways_of_associativity);
+define_one_ro(size);
+define_one_ro(number_of_sets);
+define_one_ro(shared_cpu_map);
+define_one_ro(attributes);
+
+static struct attribute * cache_default_attrs[] = {
+	&type.attr,
+	&level.attr,
+	&coherency_line_size.attr,
+	&ways_of_associativity.attr,
+	&attributes.attr,
+	&size.attr,
+	&number_of_sets.attr,
+	&shared_cpu_map.attr,
+	NULL
+};
+
+#define to_object(k) container_of(k, struct cache_info, kobj)
+#define to_attr(a) container_of(a, struct cache_attr, attr)
+
+static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+{
+	struct cache_attr *fattr = to_attr(attr);
+	struct cache_info *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->show ? fattr->show(this_leaf, buf) : 0;
+	return ret;
+}
+
+static struct sysfs_ops cache_sysfs_ops = {
+	.show   = cache_show
+};
+
+static struct kobj_type cache_ktype = {
+	.sysfs_ops	= &cache_sysfs_ops,
+	.default_attrs	= cache_default_attrs,
+};
+
+static struct kobj_type cache_ktype_percpu_entry = {
+	.sysfs_ops	= &cache_sysfs_ops,
+};
+
+static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+{
+	if (all_cpu_cache_info[cpu].cache_leaves) {
+		kfree(all_cpu_cache_info[cpu].cache_leaves);
+		all_cpu_cache_info[cpu].cache_leaves = NULL;
+	}
+	all_cpu_cache_info[cpu].num_cache_leaves = 0;
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+	return;
+}
+
+static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+{
+	u64 i, levels, unique_caches;
+	pal_cache_config_info_t cci;
+	int j;
+	s64 status;
+	struct cache_info *this_cache;
+	int num_cache_leaves = 0;
+
+	if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
+		printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
+		return -1;
+	}
+
+	this_cache=kzalloc(sizeof(struct cache_info)*unique_caches,
+			GFP_KERNEL);
+	if (this_cache == NULL)
+		return -ENOMEM;
+
+	for (i=0; i < levels; i++) {
+		for (j=2; j >0 ; j--) {
+			if ((status=ia64_pal_cache_config_info(i,j, &cci)) !=
+					PAL_STATUS_SUCCESS)
+				continue;
+
+			this_cache[num_cache_leaves].cci = cci;
+			this_cache[num_cache_leaves].level = i + 1;
+			this_cache[num_cache_leaves].type = j;
+
+			cache_shared_cpu_map_setup(cpu,
+					&this_cache[num_cache_leaves]);
+			num_cache_leaves ++;
+		}
+	}
+
+	all_cpu_cache_info[cpu].cache_leaves = this_cache;
+	all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves;
+
+	memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject));
+
+	return 0;
+}
+
+/* Add cache interface for CPU device */
+static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i, j;
+	struct cache_info *this_object;
+	int retval = 0;
+	cpumask_t oldmask;
+
+	if (all_cpu_cache_info[cpu].kobj.parent)
+		return 0;
+
+	oldmask = current->cpus_allowed;
+	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (unlikely(retval))
+		return retval;
+
+	retval = cpu_cache_sysfs_init(cpu);
+	set_cpus_allowed(current, oldmask);
+	if (unlikely(retval < 0))
+		return retval;
+
+	all_cpu_cache_info[cpu].kobj.parent = &sys_dev->kobj;
+	kobject_set_name(&all_cpu_cache_info[cpu].kobj, "%s", "cache");
+	all_cpu_cache_info[cpu].kobj.ktype = &cache_ktype_percpu_entry;
+	retval = kobject_register(&all_cpu_cache_info[cpu].kobj);
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
+		this_object = LEAF_KOBJECT_PTR(cpu,i);
+		this_object->kobj.parent = &all_cpu_cache_info[cpu].kobj;
+		kobject_set_name(&(this_object->kobj), "index%1lu", i);
+		this_object->kobj.ktype = &cache_ktype;
+		retval = kobject_register(&(this_object->kobj));
+		if (unlikely(retval)) {
+			for (j = 0; j < i; j++) {
+				kobject_unregister(
+					&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+			}
+			kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+			cpu_cache_sysfs_exit(cpu);
+			break;
+		}
+	}
+	return retval;
+}
+
+/* Remove cache interface for CPU device */
+static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+{
+	unsigned int cpu = sys_dev->id;
+	unsigned long i;
+
+	for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
+		kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+
+	if (all_cpu_cache_info[cpu].kobj.parent) {
+		kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+		memset(&all_cpu_cache_info[cpu].kobj,
+			0,
+			sizeof(struct kobject));
+	}
+
+	cpu_cache_sysfs_exit(cpu);
+
+	return 0;
+}
+
+/*
+ * When a cpu is hot-plugged, do a check and initiate
+ * cache kobject if necessary
+ */
+static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
+		unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+		cache_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+		cache_remove_dev(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cache_cpu_notifier =
+{
+	.notifier_call = cache_cpu_callback
+};
+
+static int __cpuinit cache_sysfs_init(void)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		cache_cpu_callback(&cache_cpu_notifier, CPU_ONLINE,
+				(void *)(long)i);
+	}
+
+	register_cpu_notifier(&cache_cpu_notifier);
+
+	return 0;
+}
+
+device_initcall(cache_sysfs_init);
+
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 4e7e6f23b08c..5d229c5953e9 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -68,6 +68,7 @@
 #define PAL_SHUTDOWN		40	/* enter processor shutdown state */
 #define PAL_PREFETCH_VISIBILITY	41	/* Make Processor Prefetches Visible */
 #define PAL_LOGICAL_TO_PHYSICAL 42	/* returns information on logical to physical processor mapping */
+#define PAL_CACHE_SHARED_INFO	43	/* returns information on caches shared by logical processor */
 
 #define PAL_COPY_PAL		256	/* relocate PAL procedures and PAL PMI */
 #define PAL_HALT_INFO		257	/* return the low power capabilities of processor */
@@ -1647,6 +1648,33 @@ ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping)
 
 	return iprv.status;
 }
+
+typedef struct pal_cache_shared_info_s
+{
+	u64 num_shared;
+	pal_proc_n_log_info1_t ppli1;
+	pal_proc_n_log_info2_t ppli2;
+} pal_cache_shared_info_t;
+
+/* Get information on logical to physical processor mappings. */
+static inline s64
+ia64_pal_cache_shared_info(u64 level,
+		u64 type,
+		u64 proc_number,
+		pal_cache_shared_info_t *info)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number);
+
+	if (iprv.status == PAL_STATUS_SUCCESS) {
+		info->num_shared = iprv.v0;
+		info->ppli1.ppli1_data = iprv.v1;
+		info->ppli2.ppli2_data = iprv.v2;
+	}
+
+	return iprv.status;
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_IA64_PAL_H */
-- 
cgit v1.2.3


From 0803dbed7a23721d091639c9e173c0389dcd524a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 31 Mar 2006 02:25:46 -0800
Subject: [TCP]: Kill unused extern decl for tcp_v4_hash_connecting()

Noticed by Alan Menegotto.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9418f4d1afbb..3c989db8a7aa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -405,9 +405,6 @@ extern int			tcp_disconnect(struct sock *sk, int flags);
 
 extern void			tcp_unhash(struct sock *sk);
 
-extern int			tcp_v4_hash_connecting(struct sock *sk);
-
-
 /* From syncookies.c */
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 				    struct ip_options *opt);
-- 
cgit v1.2.3


From 025be81e83043f20538dcced1e12c5f8d152fbdb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 31 Mar 2006 02:27:06 -0800
Subject: [NET]: Allow skb headroom to be overridden

Previously we added NET_IP_ALIGN so an architecture can override the
padding done to align headers. The next step is to allow the skb
headroom to be overridden.

We currently always reserve 16 bytes to grow into, meaning all DMAs
start 16 bytes into a cacheline. On ppc64 we really want DMA writes to
start on a cacheline boundary, so we increase that headroom to one
cacheline.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-powerpc/system.h |  5 ++++-
 include/linux/skbuff.h       | 29 +++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 65f5a7b2646b..d075725bf444 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
  * powers of 2 writes until it reaches sufficient alignment).
  *
  * Based on this we disable the IP header alignment in network drivers.
+ * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining
+ * cacheline alignment of buffers.
  */
-#define NET_IP_ALIGN   0
+#define NET_IP_ALIGN	0
+#define NET_SKB_PAD	L1_CACHE_BYTES
 #endif
 
 #define arch_align_stack(x) (x)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 613b9513f8b9..c4619a428d9b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 #define NET_IP_ALIGN	2
 #endif
 
+/*
+ * The networking layer reserves some headroom in skb data (via
+ * dev_alloc_skb). This is used to avoid having to reallocate skb data when
+ * the header has to grow. In the default case, if the header has to grow
+ * 16 bytes or less we avoid the reallocation.
+ *
+ * Unfortunately this headroom changes the DMA alignment of the resulting
+ * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive
+ * on some architectures. An architecture can override this value,
+ * perhaps setting it to a cacheline in size (since that will maintain
+ * cacheline alignment of the DMA). It must be a power of 2.
+ *
+ * Various parts of the networking layer expect at least 16 bytes of
+ * headroom, you should not reduce this.
+ */
+#ifndef NET_SKB_PAD
+#define NET_SKB_PAD	16
+#endif
+
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
@@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      gfp_t gfp_mask)
 {
-	struct sk_buff *skb = alloc_skb(length + 16, gfp_mask);
+	struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask);
 	if (likely(skb))
-		skb_reserve(skb, 16);
+		skb_reserve(skb, NET_SKB_PAD);
 	return skb;
 }
 #else
@@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  */
 static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
 {
-	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+	int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
+			skb_headroom(skb);
 
 	if (delta < 0)
 		delta = 0;
 
 	if (delta || skb_cloned(skb))
-		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+		return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
+				~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2ab9391dea6e36fed13443c29bf97d3be05f5289 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 31 Mar 2006 10:28:29 -0800
Subject: [IA64] Avoid "u64 foo : 32;" for gcc3 vs. gcc4 compatibility

gcc3 thinks that a 32-bit field of a u64 type is itself a u64, so
should be printed with "%ld".  gcc4 thinks it needs just "%d".
Make both versions happy by avoiding this construct.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/palinfo.c | 8 ++++----
 arch/ia64/kernel/time.c    | 2 +-
 include/asm-ia64/pal.h     | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 89faa603c6be..6386f63c413e 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -240,7 +240,7 @@ cache_info(char *page)
 			}
 			p += sprintf(p,
 				     "%s Cache level %lu:\n"
-				     "\tSize           : %lu bytes\n"
+				     "\tSize           : %u bytes\n"
 				     "\tAttributes     : ",
 				     cache_types[j+cci.pcci_unified], i+1,
 				     cci.pcci_cache_size);
@@ -648,9 +648,9 @@ frequency_info(char *page)
 	if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
 
 	p += sprintf(p,
-		     "Processor/Clock ratio   : %ld/%ld\n"
-		     "Bus/Clock ratio         : %ld/%ld\n"
-		     "ITC/Clock ratio         : %ld/%ld\n",
+		     "Processor/Clock ratio   : %d/%d\n"
+		     "Bus/Clock ratio         : %d/%d\n"
+		     "ITC/Clock ratio         : %d/%d\n",
 		     proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
 
 	return p - page;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ac167436e936..49958904045b 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -188,7 +188,7 @@ ia64_init_itm (void)
 	itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
 
 	local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
-	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
+	printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
 	       "ITC freq=%lu.%03luMHz", smp_processor_id(),
 	       platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
 	       itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 5d229c5953e9..37e52a2836b0 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -131,7 +131,7 @@ typedef u64				pal_cache_line_state_t;
 #define PAL_CACHE_LINE_STATE_MODIFIED	3	/* Modified */
 
 typedef struct pal_freq_ratio {
-	u64 den : 32, num : 32;	/* numerator & denominator */
+	u32 den, num;		/* numerator & denominator */
 } itc_ratio, proc_ratio;
 
 typedef	union  pal_cache_config_info_1_s {
@@ -152,10 +152,10 @@ typedef	union  pal_cache_config_info_1_s {
 
 typedef	union  pal_cache_config_info_2_s {
 	struct {
-		u64		cache_size	: 32,	/*cache size in bytes*/
+		u32		cache_size;		/*cache size in bytes*/
 
 
-				alias_boundary	: 8,	/* 39-32 aliased addr
+		u32		alias_boundary	: 8,	/* 39-32 aliased addr
 							 * separation for max
 							 * performance.
 							 */
-- 
cgit v1.2.3


From 48b192686dd20cb1576ae1d8ccd17a07971ef24a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 31 Mar 2006 02:29:41 -0800
Subject: [PATCH] sem2mutex: drivers/mtd/

Semaphore to mutex conversion.

The conversion was generated via scripts, and the result was validated
automatically via a script as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/mtd/devices/blkmtd.c    | 10 +++++-----
 drivers/mtd/devices/block2mtd.c | 13 +++++++------
 drivers/mtd/devices/doc2000.c   | 30 +++++++++++++++---------------
 drivers/mtd/mtd_blkdevs.c       | 32 ++++++++++++++++----------------
 drivers/mtd/mtdblock.c          | 14 ++++++++------
 drivers/mtd/mtdcore.c           | 32 ++++++++++++++++----------------
 include/linux/mtd/blktrans.h    |  4 ++--
 include/linux/mtd/doc2000.h     |  4 ++--
 8 files changed, 71 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/devices/blkmtd.c b/drivers/mtd/devices/blkmtd.c
index 04f864d238db..d732532635a1 100644
--- a/drivers/mtd/devices/blkmtd.c
+++ b/drivers/mtd/devices/blkmtd.c
@@ -29,7 +29,7 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/mtd/mtd.h>
-
+#include <linux/mutex.h>
 
 #define err(format, arg...) printk(KERN_ERR "blkmtd: " format "\n" , ## arg)
 #define info(format, arg...) printk(KERN_INFO "blkmtd: " format "\n" , ## arg)
@@ -46,7 +46,7 @@ struct blkmtd_dev {
 	struct list_head list;
 	struct block_device *blkdev;
 	struct mtd_info mtd_info;
-	struct semaphore wrbuf_mutex;
+	struct mutex wrbuf_mutex;
 };
 
 
@@ -268,7 +268,7 @@ static int write_pages(struct blkmtd_dev *dev, const u_char *buf, loff_t to,
 	if(end_len)
 		pagecnt++;
 
-	down(&dev->wrbuf_mutex);
+	mutex_lock(&dev->wrbuf_mutex);
 
 	DEBUG(3, "blkmtd: write: start_len = %zd len = %zd end_len = %zd pagecnt = %d\n",
 	      start_len, len, end_len, pagecnt);
@@ -376,7 +376,7 @@ static int write_pages(struct blkmtd_dev *dev, const u_char *buf, loff_t to,
 		blkmtd_write_out(bio);
 
 	DEBUG(2, "blkmtd: write: end, retlen = %zd, err = %d\n", *retlen, err);
-	up(&dev->wrbuf_mutex);
+	mutex_unlock(&dev->wrbuf_mutex);
 
 	if(retlen)
 		*retlen = thislen;
@@ -659,7 +659,7 @@ static struct blkmtd_dev *add_device(char *devname, int readonly, int erase_size
 	memset(dev, 0, sizeof(struct blkmtd_dev));
 	dev->blkdev = bdev;
 	if(!readonly) {
-		init_MUTEX(&dev->wrbuf_mutex);
+		mutex_init(&dev->wrbuf_mutex);
 	}
 
 	dev->mtd_info.size = dev->blkdev->bd_inode->i_size & PAGE_MASK;
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 7ff403b2a0a0..4160b8334c53 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/mtd/mtd.h>
 #include <linux/buffer_head.h>
+#include <linux/mutex.h>
 
 #define VERSION "$Revision: 1.30 $"
 
@@ -31,7 +32,7 @@ struct block2mtd_dev {
 	struct list_head list;
 	struct block_device *blkdev;
 	struct mtd_info mtd;
-	struct semaphore write_mutex;
+	struct mutex write_mutex;
 };
 
 
@@ -134,9 +135,9 @@ static int block2mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 	int err;
 
 	instr->state = MTD_ERASING;
-	down(&dev->write_mutex);
+	mutex_lock(&dev->write_mutex);
 	err = _block2mtd_erase(dev, from, len);
-	up(&dev->write_mutex);
+	mutex_unlock(&dev->write_mutex);
 	if (err) {
 		ERROR("erase failed err = %d", err);
 		instr->state = MTD_ERASE_FAILED;
@@ -249,9 +250,9 @@ static int block2mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (to + len > mtd->size)
 		len = mtd->size - to;
 
-	down(&dev->write_mutex);
+	mutex_lock(&dev->write_mutex);
 	err = _block2mtd_write(dev, buf, to, len, retlen);
-	up(&dev->write_mutex);
+	mutex_unlock(&dev->write_mutex);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -310,7 +311,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
 		goto devinit_err;
 	}
 
-	init_MUTEX(&dev->write_mutex);
+	mutex_init(&dev->write_mutex);
 
 	/* Setup the MTD structure */
 	/* make the name contain the block device in */
diff --git a/drivers/mtd/devices/doc2000.c b/drivers/mtd/devices/doc2000.c
index 0f2c956a90d3..23e7a5c7d2c1 100644
--- a/drivers/mtd/devices/doc2000.c
+++ b/drivers/mtd/devices/doc2000.c
@@ -606,7 +606,7 @@ static void DoC2k_init(struct mtd_info *mtd)
 
 	this->curfloor = -1;
 	this->curchip = -1;
-	init_MUTEX(&this->lock);
+	mutex_init(&this->lock);
 
 	/* Ident all the chips present. */
 	DoC_ScanChips(this, maxchips);
@@ -646,7 +646,7 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 	if (from >= this->totlen)
 		return -EINVAL;
 
-	down(&this->lock);
+	mutex_lock(&this->lock);
 
 	*retlen = 0;
 	while (left) {
@@ -775,7 +775,7 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 		buf += len;
 	}
 
-	up(&this->lock);
+	mutex_unlock(&this->lock);
 
 	return ret;
 }
@@ -804,7 +804,7 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 	if (to >= this->totlen)
 		return -EINVAL;
 
-	down(&this->lock);
+	mutex_lock(&this->lock);
 
 	*retlen = 0;
 	while (left) {
@@ -874,7 +874,7 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 				printk(KERN_ERR "Error programming flash\n");
 				/* Error in programming */
 				*retlen = 0;
-				up(&this->lock);
+				mutex_unlock(&this->lock);
 				return -EIO;
 			}
 
@@ -936,7 +936,7 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 			printk(KERN_ERR "Error programming flash\n");
 			/* Error in programming */
 			*retlen = 0;
-			up(&this->lock);
+			mutex_unlock(&this->lock);
 			return -EIO;
 		}
 
@@ -957,7 +957,7 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 
 			ret = doc_write_oob_nolock(mtd, to, 8, &dummy, x);
 			if (ret) {
-				up(&this->lock);
+				mutex_unlock(&this->lock);
 				return ret;
 			}
 		}
@@ -967,7 +967,7 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 		buf += len;
 	}
 
-	up(&this->lock);
+	mutex_unlock(&this->lock);
 	return 0;
 }
 
@@ -1038,7 +1038,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t ofs, size_t len,
 	int len256 = 0, ret;
 	struct Nand *mychip;
 
-	down(&this->lock);
+	mutex_lock(&this->lock);
 
 	mychip = &this->chips[ofs >> this->chipshift];
 
@@ -1084,7 +1084,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t ofs, size_t len,
 
 	ret = DoC_WaitReady(this);
 
-	up(&this->lock);
+	mutex_unlock(&this->lock);
 	return ret;
 
 }
@@ -1198,10 +1198,10 @@ static int doc_write_oob(struct mtd_info *mtd, loff_t ofs, size_t len,
  	struct DiskOnChip *this = mtd->priv;
  	int ret;
 
- 	down(&this->lock);
+ 	mutex_lock(&this->lock);
  	ret = doc_write_oob_nolock(mtd, ofs, len, retlen, buf);
 
- 	up(&this->lock);
+ 	mutex_unlock(&this->lock);
  	return ret;
 }
 
@@ -1215,10 +1215,10 @@ static int doc_erase(struct mtd_info *mtd, struct erase_info *instr)
 	struct Nand *mychip;
 	int status;
 
- 	down(&this->lock);
+ 	mutex_lock(&this->lock);
 
 	if (ofs & (mtd->erasesize-1) || len & (mtd->erasesize-1)) {
-		up(&this->lock);
+		mutex_unlock(&this->lock);
 		return -EINVAL;
 	}
 
@@ -1266,7 +1266,7 @@ static int doc_erase(struct mtd_info *mtd, struct erase_info *instr)
  callback:
 	mtd_erase_callback(instr);
 
-	up(&this->lock);
+	mutex_unlock(&this->lock);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 840dd66ce2dc..458d3c8ae1ee 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -19,12 +19,12 @@
 #include <linux/spinlock.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 static LIST_HEAD(blktrans_majors);
 
-extern struct semaphore mtd_table_mutex;
+extern struct mutex mtd_table_mutex;
 extern struct mtd_info *mtd_table[];
 
 struct mtd_blkcore_priv {
@@ -122,9 +122,9 @@ static int mtd_blktrans_thread(void *arg)
 
 		spin_unlock_irq(rq->queue_lock);
 
-		down(&dev->sem);
+		mutex_lock(&dev->lock);
 		res = do_blktrans_request(tr, dev, req);
-		up(&dev->sem);
+		mutex_unlock(&dev->lock);
 
 		spin_lock_irq(rq->queue_lock);
 
@@ -235,8 +235,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	int last_devnum = -1;
 	struct gendisk *gd;
 
-	if (!down_trylock(&mtd_table_mutex)) {
-		up(&mtd_table_mutex);
+	if (!!mutex_trylock(&mtd_table_mutex)) {
+		mutex_unlock(&mtd_table_mutex);
 		BUG();
 	}
 
@@ -267,7 +267,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		return -EBUSY;
 	}
 
-	init_MUTEX(&new->sem);
+	mutex_init(&new->lock);
 	list_add_tail(&new->list, &tr->devs);
  added:
 	if (!tr->writesect)
@@ -313,8 +313,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 {
-	if (!down_trylock(&mtd_table_mutex)) {
-		up(&mtd_table_mutex);
+	if (!!mutex_trylock(&mtd_table_mutex)) {
+		mutex_unlock(&mtd_table_mutex);
 		BUG();
 	}
 
@@ -378,14 +378,14 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 
 	memset(tr->blkcore_priv, 0, sizeof(*tr->blkcore_priv));
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	ret = register_blkdev(tr->major, tr->name);
 	if (ret) {
 		printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
 		       tr->name, tr->major, ret);
 		kfree(tr->blkcore_priv);
-		up(&mtd_table_mutex);
+		mutex_unlock(&mtd_table_mutex);
 		return ret;
 	}
 	spin_lock_init(&tr->blkcore_priv->queue_lock);
@@ -396,7 +396,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	if (!tr->blkcore_priv->rq) {
 		unregister_blkdev(tr->major, tr->name);
 		kfree(tr->blkcore_priv);
-		up(&mtd_table_mutex);
+		mutex_unlock(&mtd_table_mutex);
 		return -ENOMEM;
 	}
 
@@ -407,7 +407,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 		blk_cleanup_queue(tr->blkcore_priv->rq);
 		unregister_blkdev(tr->major, tr->name);
 		kfree(tr->blkcore_priv);
-		up(&mtd_table_mutex);
+		mutex_unlock(&mtd_table_mutex);
 		return ret;
 	}
 
@@ -419,7 +419,7 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 			tr->add_mtd(tr, mtd_table[i]);
 	}
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 
 	return 0;
 }
@@ -428,7 +428,7 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
 {
 	struct list_head *this, *next;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	/* Clean up the kernel thread */
 	tr->blkcore_priv->exiting = 1;
@@ -446,7 +446,7 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	blk_cleanup_queue(tr->blkcore_priv->rq);
 	unregister_blkdev(tr->major, tr->name);
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 
 	kfree(tr->blkcore_priv);
 
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index e84756644fd1..2cef280e388c 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -19,11 +19,13 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/blktrans.h>
+#include <linux/mutex.h>
+
 
 static struct mtdblk_dev {
 	struct mtd_info *mtd;
 	int count;
-	struct semaphore cache_sem;
+	struct mutex cache_mutex;
 	unsigned char *cache_data;
 	unsigned long cache_offset;
 	unsigned int cache_size;
@@ -284,7 +286,7 @@ static int mtdblock_open(struct mtd_blktrans_dev *mbd)
 	mtdblk->count = 1;
 	mtdblk->mtd = mtd;
 
-	init_MUTEX (&mtdblk->cache_sem);
+	mutex_init(&mtdblk->cache_mutex);
 	mtdblk->cache_state = STATE_EMPTY;
 	if ((mtdblk->mtd->flags & MTD_CAP_RAM) != MTD_CAP_RAM &&
 	    mtdblk->mtd->erasesize) {
@@ -306,9 +308,9 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd)
 
    	DEBUG(MTD_DEBUG_LEVEL1, "mtdblock_release\n");
 
-	down(&mtdblk->cache_sem);
+	mutex_lock(&mtdblk->cache_mutex);
 	write_cached_data(mtdblk);
-	up(&mtdblk->cache_sem);
+	mutex_unlock(&mtdblk->cache_mutex);
 
 	if (!--mtdblk->count) {
 		/* It was the last usage. Free the device */
@@ -327,9 +329,9 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev)
 {
 	struct mtdblk_dev *mtdblk = mtdblks[dev->devnum];
 
-	down(&mtdblk->cache_sem);
+	mutex_lock(&mtdblk->cache_mutex);
 	write_cached_data(mtdblk);
-	up(&mtdblk->cache_sem);
+	mutex_unlock(&mtdblk->cache_mutex);
 
 	if (mtdblk->mtd->sync)
 		mtdblk->mtd->sync(mtdblk->mtd);
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index dade02ab0687..9c3813486a1a 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -27,7 +27,7 @@
 
 /* These are exported solely for the purpose of mtd_blkdevs.c. You
    should not use them for _anything_ else */
-DECLARE_MUTEX(mtd_table_mutex);
+DEFINE_MUTEX(mtd_table_mutex);
 struct mtd_info *mtd_table[MAX_MTD_DEVICES];
 
 EXPORT_SYMBOL_GPL(mtd_table_mutex);
@@ -49,7 +49,7 @@ int add_mtd_device(struct mtd_info *mtd)
 {
 	int i;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	for (i=0; i < MAX_MTD_DEVICES; i++)
 		if (!mtd_table[i]) {
@@ -67,7 +67,7 @@ int add_mtd_device(struct mtd_info *mtd)
 				not->add(mtd);
 			}
 
-			up(&mtd_table_mutex);
+			mutex_unlock(&mtd_table_mutex);
 			/* We _know_ we aren't being removed, because
 			   our caller is still holding us here. So none
 			   of this try_ nonsense, and no bitching about it
@@ -76,7 +76,7 @@ int add_mtd_device(struct mtd_info *mtd)
 			return 0;
 		}
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 	return 1;
 }
 
@@ -94,7 +94,7 @@ int del_mtd_device (struct mtd_info *mtd)
 {
 	int ret;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	if (mtd_table[mtd->index] != mtd) {
 		ret = -ENODEV;
@@ -118,7 +118,7 @@ int del_mtd_device (struct mtd_info *mtd)
 		ret = 0;
 	}
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 	return ret;
 }
 
@@ -135,7 +135,7 @@ void register_mtd_user (struct mtd_notifier *new)
 {
 	int i;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	list_add(&new->list, &mtd_notifiers);
 
@@ -145,7 +145,7 @@ void register_mtd_user (struct mtd_notifier *new)
 		if (mtd_table[i])
 			new->add(mtd_table[i]);
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 }
 
 /**
@@ -162,7 +162,7 @@ int unregister_mtd_user (struct mtd_notifier *old)
 {
 	int i;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	module_put(THIS_MODULE);
 
@@ -171,7 +171,7 @@ int unregister_mtd_user (struct mtd_notifier *old)
 			old->remove(mtd_table[i]);
 
 	list_del(&old->list);
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 	return 0;
 }
 
@@ -193,7 +193,7 @@ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num)
 	struct mtd_info *ret = NULL;
 	int i;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	if (num == -1) {
 		for (i=0; i< MAX_MTD_DEVICES; i++)
@@ -211,7 +211,7 @@ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num)
 	if (ret)
 		ret->usecount++;
 
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 	return ret;
 }
 
@@ -219,9 +219,9 @@ void put_mtd_device(struct mtd_info *mtd)
 {
 	int c;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 	c = --mtd->usecount;
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
 	BUG_ON(c < 0);
 
 	module_put(mtd->owner);
@@ -319,7 +319,7 @@ static int mtd_read_proc (char *page, char **start, off_t off, int count,
 	int len, l, i;
         off_t   begin = 0;
 
-	down(&mtd_table_mutex);
+	mutex_lock(&mtd_table_mutex);
 
 	len = sprintf(page, "dev:    size   erasesize  name\n");
         for (i=0; i< MAX_MTD_DEVICES; i++) {
@@ -337,7 +337,7 @@ static int mtd_read_proc (char *page, char **start, off_t off, int count,
         *eof = 1;
 
 done:
-	up(&mtd_table_mutex);
+	mutex_unlock(&mtd_table_mutex);
         if (off >= len+begin)
                 return 0;
         *start = page + (off-begin);
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index f46afec6fbf8..72fc68c5ee96 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -10,7 +10,7 @@
 #ifndef __MTD_TRANS_H__
 #define __MTD_TRANS_H__
 
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -22,7 +22,7 @@ struct mtd_blktrans_dev {
 	struct mtd_blktrans_ops *tr;
 	struct list_head list;
 	struct mtd_info *mtd;
-	struct semaphore sem;
+	struct mutex lock;
 	int devnum;
 	int blksize;
 	unsigned long size;
diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 386a52cf8b1b..9addd073bf15 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h
@@ -15,7 +15,7 @@
 #define __MTD_DOC2000_H__
 
 #include <linux/mtd/mtd.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
 
 #define DoC_Sig1 0
 #define DoC_Sig2 1
@@ -187,7 +187,7 @@ struct DiskOnChip {
 	int numchips;
 	struct Nand *chips;
 	struct mtd_info *nextdoc;
-	struct semaphore lock;
+	struct mutex lock;
 };
 
 int doc_decode_ecc(unsigned char sector[512], unsigned char ecc1[6]);
-- 
cgit v1.2.3


From 0500abf52109d09bf60d740dec2e41d6cf265688 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 31 Mar 2006 02:29:42 -0800
Subject: [PATCH] drivers/mtd/: small cleanups

- chips/sharp.c: make two needlessly global functions static

- move some declarations to a header file where they belong to

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/mtd/chips/sharp.c | 7 +++----
 drivers/mtd/inftlcore.c   | 5 -----
 include/linux/mtd/inftl.h | 5 +++++
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/chips/sharp.c b/drivers/mtd/chips/sharp.c
index 36f61a6a766e..3cc0b23c5865 100644
--- a/drivers/mtd/chips/sharp.c
+++ b/drivers/mtd/chips/sharp.c
@@ -64,7 +64,7 @@
 
 #undef AUTOUNLOCK  /* automatically unlocks blocks before erasing */
 
-struct mtd_info *sharp_probe(struct map_info *);
+static struct mtd_info *sharp_probe(struct map_info *);
 
 static int sharp_probe_map(struct map_info *map,struct mtd_info *mtd);
 
@@ -96,7 +96,6 @@ struct sharp_info{
 	struct flchip chips[1];
 };
 
-struct mtd_info *sharp_probe(struct map_info *map);
 static void sharp_destroy(struct mtd_info *mtd);
 
 static struct mtd_chip_driver sharp_chipdrv = {
@@ -107,7 +106,7 @@ static struct mtd_chip_driver sharp_chipdrv = {
 };
 
 
-struct mtd_info *sharp_probe(struct map_info *map)
+static struct mtd_info *sharp_probe(struct map_info *map)
 {
 	struct mtd_info *mtd = NULL;
 	struct sharp_info *sharp = NULL;
@@ -581,7 +580,7 @@ static void sharp_destroy(struct mtd_info *mtd)
 
 }
 
-int __init sharp_probe_init(void)
+static int __init sharp_probe_init(void)
 {
 	printk("MTD Sharp chip driver <ds@lineo.com>\n");
 
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 8a544890173d..abb76aa6326a 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -47,9 +47,6 @@
  */
 #define MAX_LOOPS 10000
 
-extern void INFTL_dumptables(struct INFTLrecord *inftl);
-extern void INFTL_dumpVUchains(struct INFTLrecord *inftl);
-
 static void inftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 {
 	struct INFTLrecord *inftl;
@@ -885,8 +882,6 @@ static struct mtd_blktrans_ops inftl_tr = {
 	.owner		= THIS_MODULE,
 };
 
-extern char inftlmountrev[];
-
 static int __init init_inftl(void)
 {
 	printk(KERN_INFO "INFTL: inftlcore.c $Revision: 1.19 $, "
diff --git a/include/linux/mtd/inftl.h b/include/linux/mtd/inftl.h
index 0268125a6271..d7eaa40e5ab0 100644
--- a/include/linux/mtd/inftl.h
+++ b/include/linux/mtd/inftl.h
@@ -52,6 +52,11 @@ struct INFTLrecord {
 int INFTL_mount(struct INFTLrecord *s);
 int INFTL_formatblock(struct INFTLrecord *s, int block);
 
+extern char inftlmountrev[];
+
+void INFTL_dumptables(struct INFTLrecord *s);
+void INFTL_dumpVUchains(struct INFTLrecord *s);
+
 #endif /* __KERNEL__ */
 
 #endif /* __MTD_INFTL_H__ */
-- 
cgit v1.2.3


From 9bf9e89c3d147ca8cf9622d2d053684fba77a464 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Fri, 31 Mar 2006 02:29:56 -0800
Subject: [PATCH] migrate_pages_to() must be defined for the no swap case

Fix migrate_pages_to() definition.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/migrate.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7d09962c3c0b..ff0a64073ebc 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -12,7 +12,7 @@ extern void migrate_page_copy(struct page *, struct page *);
 extern int migrate_page_remove_references(struct page *, struct page *, int);
 extern int migrate_pages(struct list_head *l, struct list_head *t,
 		struct list_head *moved, struct list_head *failed);
-int migrate_pages_to(struct list_head *pagelist,
+extern int migrate_pages_to(struct list_head *pagelist,
 			struct vm_area_struct *vma, int dest);
 extern int fail_migrate_page(struct page *, struct page *);
 
@@ -26,6 +26,9 @@ static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, struct list_head *t,
 	struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
 
+static inline int migrate_pages_to(struct list_head *pagelist,
+			struct vm_area_struct *vma, int dest) { return 0; }
+
 static inline int migrate_prep(void) { return -ENOSYS; }
 
 /* Possible settings for the migrate_page() method in address_operations */
-- 
cgit v1.2.3


From 93fac7041f082297b93655a0e49f659cd7520e40 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 31 Mar 2006 02:29:56 -0800
Subject: [PATCH] mm: schedule find_trylock_page() removal

find_trylock_page() is an odd interface in that it doesn't take a reference
like the others.  Now that XFS no longer uses it, and its last remaining
caller actually wants an elevated refcount, opencode that callsite and
schedule find_trylock_page() for removal.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/feature-removal-schedule.txt | 12 ++++++++++++
 include/linux/pagemap.h                    |  4 ++--
 mm/swapfile.c                              | 14 ++++++++++----
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 495858b236b6..a92b10bb0b03 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -241,3 +241,15 @@ Why:	The USB subsystem has changed a lot over time, and it has been
 Who:	Greg Kroah-Hartman <gregkh@suse.de>
 
 ---------------------------
+
+What:	find_trylock_page
+When:	January 2007
+Why:	The interface no longer has any callers left in the kernel. It
+	is an odd interface (compared with other find_*_page functions), in
+	that it does not take a refcount to the page, only the page lock.
+	It should be replaced with find_get_page or find_lock_page if possible.
+	This feature removal can be reevaluated if users of the interface
+	cannot cleanly use something else.
+Who:	Nick Piggin <npiggin@suse.de>
+
+---------------------------
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 839f0b3c23aa..9539efd4f7e6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -72,8 +72,8 @@ extern struct page * find_get_page(struct address_space *mapping,
 				unsigned long index);
 extern struct page * find_lock_page(struct address_space *mapping,
 				unsigned long index);
-extern struct page * find_trylock_page(struct address_space *mapping,
-				unsigned long index);
+extern __deprecated_for_modules struct page * find_trylock_page(
+			struct address_space *mapping, unsigned long index);
 extern struct page * find_or_create_page(struct address_space *mapping,
 				unsigned long index, gfp_t gfp_mask);
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 39aa9d129612..e5fd5385f0cc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -397,18 +397,24 @@ void free_swap_and_cache(swp_entry_t entry)
 
 	p = swap_info_get(entry);
 	if (p) {
-		if (swap_entry_free(p, swp_offset(entry)) == 1)
-			page = find_trylock_page(&swapper_space, entry.val);
+		if (swap_entry_free(p, swp_offset(entry)) == 1) {
+			page = find_get_page(&swapper_space, entry.val);
+			if (page && unlikely(TestSetPageLocked(page))) {
+				page_cache_release(page);
+				page = NULL;
+			}
+		}
 		spin_unlock(&swap_lock);
 	}
 	if (page) {
 		int one_user;
 
 		BUG_ON(PagePrivate(page));
-		page_cache_get(page);
 		one_user = (page_count(page) == 2);
 		/* Only cache user (+us), or swap space full? Free it! */
-		if (!PageWriteback(page) && (one_user || vm_swap_full())) {
+		/* Also recheck PageSwapCache after page is locked (above) */
+		if (PageSwapCache(page) && !PageWriteback(page) &&
+					(one_user || vm_swap_full())) {
 			delete_from_swap_cache(page);
 			SetPageDirty(page);
 		}
-- 
cgit v1.2.3


From 3ccfb81e871b45e4af6ebb3282f3cfa0f98f1b80 Mon Sep 17 00:00:00 2001
From: Brian Gerst <bgerst@didntduck.org>
Date: Fri, 31 Mar 2006 02:30:04 -0800
Subject: [PATCH] Remove long dead i386 floppy asm code

It's been disabled since v2.1.88

Signed-off-by: Brian Gerst <bgerst@didntduck.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/floppy.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/floppy.h b/include/asm-i386/floppy.h
index 79727afb94c9..03403045c182 100644
--- a/include/asm-i386/floppy.h
+++ b/include/asm-i386/floppy.h
@@ -56,7 +56,6 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
 	register unsigned char st;
 
 #undef TRACE_FLPY_INT
-#define NO_FLOPPY_ASSEMBLER
 
 #ifdef TRACE_FLPY_INT
 	static int calls=0;
@@ -71,38 +70,6 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
 		bytes = virtual_dma_count;
 #endif
 
-#ifndef NO_FLOPPY_ASSEMBLER
-	__asm__ (
-       "testl %1,%1"
-	"je 3f"
-"1:	inb %w4,%b0"
-	"andb $160,%b0"
-	"cmpb $160,%b0"
-	"jne 2f"
-	"incw %w4"
-	"testl %3,%3"
-	"jne 4f"
-	"inb %w4,%b0"
-	"movb %0,(%2)"
-	"jmp 5f"
-"4:    	movb (%2),%0"
-	"outb %b0,%w4"
-"5:	decw %w4"
-	"outb %0,$0x80"
-	"decl %1"
-	"incl %2"
-	"testl %1,%1"
-	"jne 1b"
-"3:	inb %w4,%b0"
-"2:	"
-       : "=a" ((char) st), 
-       "=c" ((long) virtual_dma_count), 
-       "=S" ((long) virtual_dma_addr)
-       : "b" ((long) virtual_dma_mode),
-       "d" ((short) virtual_dma_port+4), 
-       "1" ((long) virtual_dma_count),
-       "2" ((long) virtual_dma_addr));
-#else	
 	{
 		register int lcount;
 		register char *lptr;
@@ -122,7 +89,6 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
 		virtual_dma_addr = lptr;
 		st = inb(virtual_dma_port+4);
 	}
-#endif
 
 #ifdef TRACE_FLPY_INT
 	calls++;
-- 
cgit v1.2.3


From 1a75a3f0680d9c4bc4761512658b6fd664032e18 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Fri, 31 Mar 2006 02:30:05 -0800
Subject: [PATCH] i386 kdump timer vector lockup fix

Porting the patch I posted for x86_64 to i386.

http://marc.theaimsgroup.com/?l=linux-kernel&m=114178139610707&w=2

o While using kdump, after a system crash when second kernel boots, timer
  vector gets (0x31) locked and CPU does not see timer interrupts
  travelling from IOAPIC to APIC.  Currently it does not lead to boot
  failure in second kernel as timer interrupts continues to come as ExtInt
  through LAPIC directly, but fixing it is good in case some boards do not
  support the other mode.

o After a system crash, it is not safe to service interrupts any more,
  hence interrupts are disabled.  This leads to pending interrupts at
  LAPIC.  LAPIC sends these interrupts to the CPU during early boot of
  second kernel.  Other pending interrupts are discarded saying unexpected
  trap but timer interrupt is serviced and CPU does not issue an LAPIC EOI
  because it think this interrupt came from i8259 and sends ack to 8259.
  This leads to vector 0x31 locking as LAPIC does not clear respective ISR
  and keeps on waiting for EOI.

o This patch issues extra EOI for the pending interrupts who have ISR set.

o Though today only timer seems to be the special case because in early
  boot it thinks interrupts are coming from i8259 and uses
  mask_and_ack_8259A() as ack handler and does not issue LAPIC EOI.  But
  probably doing it in generic manner for all vectors makes sense.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/apic.c    | 20 ++++++++++++++++++++
 include/asm-i386/apicdef.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index eb5279d23b7f..3fff3c62d57a 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -415,6 +415,7 @@ void __init init_bsp_APIC(void)
 void __devinit setup_local_APIC(void)
 {
 	unsigned long oldvalue, value, ver, maxlvt;
+	int i, j;
 
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
 	if (esr_disable) {
@@ -451,6 +452,25 @@ void __devinit setup_local_APIC(void)
 	value &= ~APIC_TPRI_MASK;
 	apic_write_around(APIC_TASKPRI, value);
 
+	/*
+	 * After a crash, we no longer service the interrupts and a pending
+	 * interrupt from previous kernel might still have ISR bit set.
+	 *
+	 * Most probably by now CPU has serviced that pending interrupt and
+	 * it might not have done the ack_APIC_irq() because it thought,
+	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+	 * does not clear the ISR bit and cpu thinks it has already serivced
+	 * the interrupt. Hence a vector might get locked. It was noticed
+	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+	 */
+	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+		value = apic_read(APIC_ISR + i*0x10);
+		for (j = 31; j >= 0; j--) {
+			if (value & (1<<j))
+				ack_APIC_irq();
+		}
+	}
+
 	/*
 	 * Now that we are all set up, enable the APIC
 	 */
diff --git a/include/asm-i386/apicdef.h b/include/asm-i386/apicdef.h
index 03185cef8e0a..5e4a35af2921 100644
--- a/include/asm-i386/apicdef.h
+++ b/include/asm-i386/apicdef.h
@@ -37,6 +37,7 @@
 #define			APIC_SPIV_FOCUS_DISABLED	(1<<9)
 #define			APIC_SPIV_APIC_ENABLED		(1<<8)
 #define		APIC_ISR	0x100
+#define         APIC_ISR_NR     0x8     /* Number of 32 bit ISR registers. */
 #define		APIC_TMR	0x180
 #define 	APIC_IRR	0x200
 #define 	APIC_ESR	0x280
-- 
cgit v1.2.3


From 4d338e1accfc3473f7e453427dfd4f1ebf4dbbe6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Mar 2006 02:30:15 -0800
Subject: [PATCH] uml: sparse cleanups

misc sparse annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/drivers/harddog_kern.c         |  8 +++---
 arch/um/drivers/hostaudio_kern.c       | 10 +++----
 arch/um/drivers/slirp_kern.c           |  2 +-
 arch/um/include/line.h                 | 18 +++++--------
 arch/um/include/sysdep-i386/checksum.h |  5 ++--
 arch/um/kernel/exec_kern.c             |  4 +--
 arch/um/kernel/process_kern.c          |  2 +-
 arch/um/kernel/ptrace.c                | 34 +++++++++++-------------
 arch/um/kernel/syscall_kern.c          |  4 +--
 arch/um/kernel/trap_kern.c             |  8 +++---
 arch/um/sys-i386/ptrace.c              | 28 ++++++++++----------
 arch/um/sys-i386/signal.c              | 48 ++++++++++++++++++----------------
 arch/um/sys-i386/syscalls.c            |  2 +-
 include/asm-um/ptrace-generic.h        |  2 +-
 include/asm-um/thread_info.h           | 16 ++++++------
 include/asm-um/uaccess.h               |  2 +-
 16 files changed, 93 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 49acb2badf32..d18a974735e6 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -104,7 +104,7 @@ static int harddog_release(struct inode *inode, struct file *file)
 
 extern int ping_watchdog(int fd);
 
-static ssize_t harddog_write(struct file *file, const char *data, size_t len,
+static ssize_t harddog_write(struct file *file, const char __user *data, size_t len,
 			     loff_t *ppos)
 {
 	/*
@@ -118,6 +118,7 @@ static ssize_t harddog_write(struct file *file, const char *data, size_t len,
 static int harddog_ioctl(struct inode *inode, struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
+	void __user *argp= (void __user *)arg;
 	static struct watchdog_info ident = {
 		WDIOC_SETTIMEOUT,
 		0,
@@ -127,13 +128,12 @@ static int harddog_ioctl(struct inode *inode, struct file *file,
 		default:
 			return -ENOTTY;
 		case WDIOC_GETSUPPORT:
-			if(copy_to_user((struct harddog_info *)arg, &ident,
-					sizeof(ident)))
+			if(copy_to_user(argp, &ident, sizeof(ident)))
 				return -EFAULT;
 			return 0;
 		case WDIOC_GETSTATUS:
 		case WDIOC_GETBOOTSTATUS:
-			return put_user(0,(int *)arg);
+			return put_user(0,(int __user *)argp);
 		case WDIOC_KEEPALIVE:
 			return(ping_watchdog(harddog_out_fd));
 	}
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 59602b81b240..37232f908cd7 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -67,8 +67,8 @@ MODULE_PARM_DESC(mixer, MIXER_HELP);
 
 /* /dev/dsp file operations */
 
-static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count, 
-			      loff_t *ppos)
+static ssize_t hostaudio_read(struct file *file, char __user *buffer,
+			      size_t count, loff_t *ppos)
 {
         struct hostaudio_state *state = file->private_data;
 	void *kbuf;
@@ -94,7 +94,7 @@ static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count,
 	return(err);
 }
 
-static ssize_t hostaudio_write(struct file *file, const char *buffer, 
+static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
 			       size_t count, loff_t *ppos)
 {
         struct hostaudio_state *state = file->private_data;
@@ -152,7 +152,7 @@ static int hostaudio_ioctl(struct inode *inode, struct file *file,
 	case SNDCTL_DSP_CHANNELS:
 	case SNDCTL_DSP_SUBDIVIDE:
 	case SNDCTL_DSP_SETFRAGMENT:
-		if(get_user(data, (int *) arg))
+		if(get_user(data, (int __user *) arg))
 			return(-EFAULT);
 		break;
 	default:
@@ -168,7 +168,7 @@ static int hostaudio_ioctl(struct inode *inode, struct file *file,
 	case SNDCTL_DSP_CHANNELS:
 	case SNDCTL_DSP_SUBDIVIDE:
 	case SNDCTL_DSP_SETFRAGMENT:
-		if(put_user(data, (int *) arg))
+		if(put_user(data, (int __user *) arg))
 			return(-EFAULT);
 		break;
 	default:
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index 109c541e78f2..95e50c943e14 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -77,7 +77,7 @@ static int slirp_setup(char *str, char **mac_out, void *data)
 	int i=0;
 
 	*init = ((struct slirp_init)
-		{ argw :		{ { "slirp", NULL  } } });
+		{ .argw = { { "slirp", NULL  } } });
 
 	str = split_if_spec(str, mac_out, NULL);
 
diff --git a/arch/um/include/line.h b/arch/um/include/line.h
index 6f4d680dc1d4..6ac0f8252e21 100644
--- a/arch/um/include/line.h
+++ b/arch/um/include/line.h
@@ -58,23 +58,17 @@ struct line {
 };
 
 #define LINE_INIT(str, d) \
-	{ init_str :	str, \
-	  init_pri :	INIT_STATIC, \
-	  valid :	1, \
-	  throttled :	0, \
-	  lock :	SPIN_LOCK_UNLOCKED, \
-	  buffer :	NULL, \
-	  head :	NULL, \
-	  tail :	NULL, \
-	  sigio :	0, \
-	  driver :	d, \
-	  have_irq :	0 }
+	{ .init_str =	str, \
+	  .init_pri =	INIT_STATIC, \
+	  .valid =	1, \
+	  .lock =	SPIN_LOCK_UNLOCKED, \
+	  .driver =	d }
 
 struct lines {
 	int num;
 };
 
-#define LINES_INIT(n) {  num :		n }
+#define LINES_INIT(n) {  .num =	n }
 
 extern void line_close(struct tty_struct *tty, struct file * filp);
 extern int line_open(struct line *lines, struct tty_struct *tty);
diff --git a/arch/um/include/sysdep-i386/checksum.h b/arch/um/include/sysdep-i386/checksum.h
index 7d3d202d7fff..052bb061a978 100644
--- a/arch/um/include/sysdep-i386/checksum.h
+++ b/arch/um/include/sysdep-i386/checksum.h
@@ -48,7 +48,8 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, unsigned char *
  */
 
 static __inline__
-unsigned int csum_partial_copy_from_user(const unsigned char *src, unsigned char *dst,
+unsigned int csum_partial_copy_from_user(const unsigned char __user *src,
+					 unsigned char *dst,
 					 int len, int sum, int *err_ptr)
 {
 	if(copy_from_user(dst, src, len)){
@@ -192,7 +193,7 @@ static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
  */
 #define HAVE_CSUM_COPY_USER
 static __inline__ unsigned int csum_and_copy_to_user(const unsigned char *src,
-						     unsigned char *dst,
+						     unsigned char __user *dst,
 						     int len, int sum, int *err_ptr)
 {
 	if (access_ok(VERIFY_WRITE, dst, len)){
diff --git a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
index 1ca84319317d..f9b346e05b00 100644
--- a/arch/um/kernel/exec_kern.c
+++ b/arch/um/kernel/exec_kern.c
@@ -58,14 +58,14 @@ long um_execve(char *file, char __user *__user *argv, char __user *__user *env)
 	return(err);
 }
 
-long sys_execve(char *file, char __user *__user *argv,
+long sys_execve(char __user *file, char __user *__user *argv,
 		char __user *__user *env)
 {
 	long error;
 	char *filename;
 
 	lock_kernel();
-	filename = getname((char __user *) file);
+	filename = getname(file);
 	error = PTR_ERR(filename);
 	if (IS_ERR(filename)) goto out;
 	error = execve1(filename, argv, env);
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 3113cab8675e..6922bdfb5ad5 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -407,7 +407,7 @@ static int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int
 	return strlen(buf);
 }
 
-static int proc_write_sysemu(struct file *file,const char *buf, unsigned long count,void *data)
+static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned long count,void *data)
 {
 	char tmp[2];
 
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 98e09395c093..394582202ce6 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -46,6 +46,7 @@ extern int poke_user(struct task_struct * child, long addr, long data);
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	int i, ret;
+	unsigned long __user *p = (void __user *)(unsigned long)data;
 
 	switch (request) {
 		/* when I and D space are separate, these will need to be fixed. */
@@ -58,7 +59,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
 		if (copied != sizeof(tmp))
 			break;
-		ret = put_user(tmp, (unsigned long __user *) data);
+		ret = put_user(tmp, p);
 		break;
 	}
 
@@ -136,15 +137,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
 #ifdef PTRACE_GETREGS
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
-	  	if (!access_ok(VERIFY_WRITE, (unsigned long *)data, 
-			       MAX_REG_OFFSET)) {
+		if (!access_ok(VERIFY_WRITE, p, MAX_REG_OFFSET)) {
 			ret = -EIO;
 			break;
 		}
 		for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) {
-			__put_user(getreg(child, i),
-				   (unsigned long __user *) data);
-			data += sizeof(long);
+			__put_user(getreg(child, i), p);
+			p++;
 		}
 		ret = 0;
 		break;
@@ -153,15 +152,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 #ifdef PTRACE_SETREGS
 	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
 		unsigned long tmp = 0;
-	  	if (!access_ok(VERIFY_READ, (unsigned *)data, 
-			       MAX_REG_OFFSET)) {
+		if (!access_ok(VERIFY_READ, p, MAX_REG_OFFSET)) {
 			ret = -EIO;
 			break;
 		}
 		for ( i = 0; i < MAX_REG_OFFSET; i += sizeof(long) ) {
-			__get_user(tmp, (unsigned long __user *) data);
+			__get_user(tmp, p);
 			putreg(child, i, tmp);
-			data += sizeof(long);
+			p++;
 		}
 		ret = 0;
 		break;
@@ -188,13 +186,12 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 #endif
 	case PTRACE_FAULTINFO: {
-                /* Take the info from thread->arch->faultinfo,
-                 * but transfer max. sizeof(struct ptrace_faultinfo).
-                 * On i386, ptrace_faultinfo is smaller!
-                 */
-                ret = copy_to_user((unsigned long __user *) data,
-                                   &child->thread.arch.faultinfo,
-                                   sizeof(struct ptrace_faultinfo));
+		/* Take the info from thread->arch->faultinfo,
+		 * but transfer max. sizeof(struct ptrace_faultinfo).
+		 * On i386, ptrace_faultinfo is smaller!
+		 */
+		ret = copy_to_user(p, &child->thread.arch.faultinfo,
+				   sizeof(struct ptrace_faultinfo));
 		if(ret)
 			break;
 		break;
@@ -204,8 +201,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	case PTRACE_LDT: {
 		struct ptrace_ldt ldt;
 
-		if(copy_from_user(&ldt, (unsigned long __user *) data,
-				  sizeof(ldt))){
+		if(copy_from_user(&ldt, p, sizeof(ldt))){
 			ret = -EIO;
 			break;
 		}
diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c
index 8e1a3501ff46..37d3978337d8 100644
--- a/arch/um/kernel/syscall_kern.c
+++ b/arch/um/kernel/syscall_kern.c
@@ -104,7 +104,7 @@ long sys_pipe(unsigned long __user * fildes)
 }
 
 
-long sys_uname(struct old_utsname * name)
+long sys_uname(struct old_utsname __user * name)
 {
 	long err;
 	if (!name)
@@ -115,7 +115,7 @@ long sys_uname(struct old_utsname * name)
 	return err?-EFAULT:0;
 }
 
-long sys_olduname(struct oldold_utsname * name)
+long sys_olduname(struct oldold_utsname __user * name)
 {
 	long error;
 
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index d56046c2aba2..02f6d4d8dc3a 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -198,7 +198,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
 		si.si_signo = SIGBUS;
 		si.si_errno = 0;
 		si.si_code = BUS_ADRERR;
-		si.si_addr = (void *)address;
+		si.si_addr = (void __user *)address;
                 current->thread.arch.faultinfo = fi;
 		force_sig_info(SIGBUS, &si, current);
 	} else if (err == -ENOMEM) {
@@ -207,7 +207,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
 	} else {
 		BUG_ON(err != -EFAULT);
 		si.si_signo = SIGSEGV;
-		si.si_addr = (void *) address;
+		si.si_addr = (void __user *) address;
                 current->thread.arch.faultinfo = fi;
 		force_sig_info(SIGSEGV, &si, current);
 	}
@@ -220,8 +220,8 @@ void bad_segv(struct faultinfo fi, unsigned long ip)
 
 	si.si_signo = SIGSEGV;
 	si.si_code = SEGV_ACCERR;
-        si.si_addr = (void *) FAULT_ADDRESS(fi);
-        current->thread.arch.faultinfo = fi;
+	si.si_addr = (void __user *) FAULT_ADDRESS(fi);
+	current->thread.arch.faultinfo = fi;
 	force_sig_info(SIGSEGV, &si, current);
 }
 
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index 8032a105949a..ff94eded93eb 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -124,22 +124,22 @@ unsigned long getreg(struct task_struct *child, int regno)
 int peek_user(struct task_struct *child, long addr, long data)
 {
 /* read the word at location addr in the USER area. */
-        unsigned long tmp;
+	unsigned long tmp;
 
-        if ((addr & 3) || addr < 0)
-                return -EIO;
+	if ((addr & 3) || addr < 0)
+		return -EIO;
 
-        tmp = 0;  /* Default return condition */
-        if(addr < MAX_REG_OFFSET){
-                tmp = getreg(child, addr);
-        }
-        else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))){
-                addr -= offsetof(struct user, u_debugreg[0]);
-                addr = addr >> 2;
-                tmp = child->thread.arch.debugregs[addr];
-        }
-        return put_user(tmp, (unsigned long *) data);
+	tmp = 0;  /* Default return condition */
+	if(addr < MAX_REG_OFFSET){
+		tmp = getreg(child, addr);
+	}
+	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
+		(addr <= offsetof(struct user, u_debugreg[7]))){
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		tmp = child->thread.arch.debugregs[addr];
+	}
+	return put_user(tmp, (unsigned long __user *) data);
 }
 
 struct i387_fxsave_struct {
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 33a40f5ef0d2..f5d0e1c37ea2 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -19,7 +19,7 @@
 #include "skas.h"
 
 static int copy_sc_from_user_skas(struct pt_regs *regs,
-				  struct sigcontext *from)
+				  struct sigcontext __user *from)
 {
   	struct sigcontext sc;
 	unsigned long fpregs[HOST_FP_SIZE];
@@ -57,7 +57,7 @@ static int copy_sc_from_user_skas(struct pt_regs *regs,
 	return(0);
 }
 
-int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
+int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate __user *to_fp,
                          struct pt_regs *regs, unsigned long sp)
 {
   	struct sigcontext sc;
@@ -92,7 +92,7 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
 		       "errno = %d\n", err);
 		return(1);
 	}
-	to_fp = (to_fp ? to_fp : (struct _fpstate *) (to + 1));
+	to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
 	sc.fpstate = to_fp;
 
 	if(err)
@@ -113,10 +113,11 @@ int copy_sc_to_user_skas(struct sigcontext *to, struct _fpstate *to_fp,
  * saved pointer is in the kernel, but the sigcontext is in userspace, so we
  * copy_to_user it.
  */
-int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
+int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext __user *from,
 			 int fpsize)
 {
-	struct _fpstate *to_fp, *from_fp;
+	struct _fpstate *to_fp;
+	struct _fpstate __user *from_fp;
 	unsigned long sigs;
 	int err;
 
@@ -131,13 +132,14 @@ int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext *from,
 	return(err);
 }
 
-int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate *fp,
+int copy_sc_to_user_tt(struct sigcontext *to, struct _fpstate __user *fp,
 		       struct sigcontext *from, int fpsize, unsigned long sp)
 {
-	struct _fpstate *to_fp, *from_fp;
+	struct _fpstate __user *to_fp;
+	struct _fpstate *from_fp;
 	int err;
 
-	to_fp =	(fp ? fp : (struct _fpstate *) (to + 1));
+	to_fp =	(fp ? fp : (struct _fpstate __user *) (to + 1));
 	from_fp = from->fpstate;
 	err = copy_to_user(to, from, sizeof(*to));
 
@@ -165,7 +167,7 @@ static int copy_sc_from_user(struct pt_regs *to, void __user *from)
 	return(ret);
 }
 
-static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp,
+static int copy_sc_to_user(struct sigcontext *to, struct _fpstate __user *fp,
 			   struct pt_regs *from, unsigned long sp)
 {
 	return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
@@ -173,7 +175,7 @@ static int copy_sc_to_user(struct sigcontext *to, struct _fpstate *fp,
                            copy_sc_to_user_skas(to, fp, from, sp)));
 }
 
-static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp,
+static int copy_ucontext_to_user(struct ucontext __user *uc, struct _fpstate __user *fp,
 				 sigset_t *set, unsigned long sp)
 {
 	int err = 0;
@@ -188,7 +190,7 @@ static int copy_ucontext_to_user(struct ucontext *uc, struct _fpstate *fp,
 
 struct sigframe
 {
-	char *pretcode;
+	char __user *pretcode;
 	int sig;
 	struct sigcontext sc;
 	struct _fpstate fpstate;
@@ -198,10 +200,10 @@ struct sigframe
 
 struct rt_sigframe
 {
-	char *pretcode;
+	char __user *pretcode;
 	int sig;
-	struct siginfo *pinfo;
-	void *puc;
+	struct siginfo __user *pinfo;
+	void __user *puc;
 	struct siginfo info;
 	struct ucontext uc;
 	struct _fpstate fpstate;
@@ -213,16 +215,16 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 			  sigset_t *mask)
 {
 	struct sigframe __user *frame;
-	void *restorer;
+	void __user *restorer;
 	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	stack_top &= -8UL;
-	frame = (struct sigframe *) stack_top - 1;
+	frame = (struct sigframe __user *) stack_top - 1;
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return 1;
 
-	restorer = (void *) frame->retcode;
+	restorer = frame->retcode;
 	if(ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
@@ -278,16 +280,16 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 			  siginfo_t *info, sigset_t *mask)
 {
 	struct rt_sigframe __user *frame;
-	void *restorer;
+	void __user *restorer;
 	unsigned long save_sp = PT_REGS_SP(regs);
 	int err = 0;
 
 	stack_top &= -8UL;
-	frame = (struct rt_sigframe *) stack_top - 1;
+	frame = (struct rt_sigframe __user *) stack_top - 1;
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		return 1;
 
-	restorer = (void *) frame->retcode;
+	restorer = frame->retcode;
 	if(ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
@@ -333,7 +335,7 @@ err:
 long sys_sigreturn(struct pt_regs regs)
 {
 	unsigned long sp = PT_REGS_SP(&current->thread.regs);
-	struct sigframe __user *frame = (struct sigframe *)(sp - 8);
+	struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
 	sigset_t set;
 	struct sigcontext __user *sc = &frame->sc;
 	unsigned long __user *oldmask = &sc->oldmask;
@@ -365,8 +367,8 @@ long sys_sigreturn(struct pt_regs regs)
 
 long sys_rt_sigreturn(struct pt_regs regs)
 {
-	unsigned long __user sp = PT_REGS_SP(&current->thread.regs);
-	struct rt_sigframe __user *frame = (struct rt_sigframe *) (sp - 4);
+	unsigned long sp = PT_REGS_SP(&current->thread.regs);
+	struct rt_sigframe __user *frame = (struct rt_sigframe __user *) (sp - 4);
 	sigset_t set;
 	struct ucontext __user *uc = &frame->uc;
 	int sig_size = _NSIG_WORDS * sizeof(unsigned long);
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 83e9be820a86..1845123ed124 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -104,7 +104,7 @@ long sys_ipc (uint call, int first, int second,
 		union semun fourth;
 		if (!ptr)
 			return -EINVAL;
-		if (get_user(fourth.__pad, (void **) ptr))
+		if (get_user(fourth.__pad, (void __user * __user *) ptr))
 			return -EFAULT;
 		return sys_semctl (first, second, third, fourth);
 	}
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index 46599ac44037..8c57e384cb8a 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -28,7 +28,7 @@ struct pt_regs {
 	union uml_pt_regs regs;
 };
 
-#define EMPTY_REGS { regs : EMPTY_UML_PT_REGS }
+#define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS }
 
 #define PT_REGS_IP(r) UPT_IP(&(r)->regs)
 #define PT_REGS_SP(r) UPT_SP(&(r)->regs)
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
index 17b6b07c4332..f166b9837c6a 100644
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -27,14 +27,14 @@ struct thread_info {
 
 #define INIT_THREAD_INFO(tsk)			\
 {						\
-	task:		&tsk,			\
-	exec_domain:	&default_exec_domain,	\
-	flags:		0,			\
-	cpu:		0,			\
-	preempt_count:	1,			\
-	addr_limit:	KERNEL_DS,		\
-	restart_block:  {			\
-		fn:  do_no_restart_syscall,	\
+	.task =		&tsk,			\
+	.exec_domain =	&default_exec_domain,	\
+	.flags =		0,		\
+	.cpu =		0,			\
+	.preempt_count =	1,		\
+	.addr_limit =	KERNEL_DS,		\
+	.restart_block =  {			\
+		.fn =  do_no_restart_syscall,	\
 	},					\
 }
 
diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
index 4e460d6f5ac8..bea5a015f667 100644
--- a/include/asm-um/uaccess.h
+++ b/include/asm-um/uaccess.h
@@ -57,7 +57,7 @@
 ({ \
         const __typeof__((*(ptr))) __user *private_ptr = (ptr); \
         (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \
-	 __get_user(x, private_ptr) : ((x) = 0, -EFAULT)); \
+	 __get_user(x, private_ptr) : ((x) = (__typeof__(*ptr))0, -EFAULT)); \
 })
 
 #define __put_user(x, ptr) \
-- 
cgit v1.2.3


From fbdf2161552a2065047e5df2dbf9ebf69d66a0e9 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Fri, 31 Mar 2006 02:30:19 -0800
Subject: [PATCH] uml: split ldt.h in arch-independent and arch-dependant code

ldt-{i386,x86_64}.h is made of two different parts - some code for parsing of
LDT descriptors, which is arch-dependant, and the code to handle uml_ldt_t (an
LDT block inside UML), which is mostly arch-independant (among x86 and x86_64,
at least).

Join the common part in a single file (ldt.h) and split the rest away
(host_ldt-{i386,x86_64}.h).

This is needed because processor.h, with next patches, will start including
the LDT descriptor parsing macros in host_ldt.h, but it can't include ldt.h
because it uses semaphores (and to define semaphores one must first include
processor.h!).

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/Makefile                 |  2 +-
 include/asm-um/host_ldt-i386.h   | 34 +++++++++++++++++++
 include/asm-um/host_ldt-x86_64.h | 38 +++++++++++++++++++++
 include/asm-um/ldt-i386.h        | 69 -------------------------------------
 include/asm-um/ldt-x86_64.h      | 73 ----------------------------------------
 include/asm-um/ldt.h             | 41 ++++++++++++++++++++++
 6 files changed, 114 insertions(+), 143 deletions(-)
 create mode 100644 include/asm-um/host_ldt-i386.h
 create mode 100644 include/asm-um/host_ldt-x86_64.h
 delete mode 100644 include/asm-um/ldt-i386.h
 delete mode 100644 include/asm-um/ldt-x86_64.h
 create mode 100644 include/asm-um/ldt.h

(limited to 'include')

diff --git a/arch/um/Makefile b/arch/um/Makefile
index a08958a3f3c1..24790bed2054 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -20,7 +20,7 @@ core-y			+= $(ARCH_DIR)/kernel/		\
 
 # Have to precede the include because the included Makefiles reference them.
 SYMLINK_HEADERS := archparam.h system.h sigcontext.h processor.h ptrace.h \
-	module.h vm-flags.h elf.h ldt.h
+	module.h vm-flags.h elf.h host_ldt.h
 SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header))
 
 # XXX: The "os" symlink is only used by arch/um/include/os.h, which includes
diff --git a/include/asm-um/host_ldt-i386.h b/include/asm-um/host_ldt-i386.h
new file mode 100644
index 000000000000..b27cb0a9dd30
--- /dev/null
+++ b/include/asm-um/host_ldt-i386.h
@@ -0,0 +1,34 @@
+#ifndef __ASM_HOST_LDT_I386_H
+#define __ASM_HOST_LDT_I386_H
+
+#include "asm/arch/ldt.h"
+
+/*
+ * macros stolen from include/asm-i386/desc.h
+ */
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
+
+#endif
diff --git a/include/asm-um/host_ldt-x86_64.h b/include/asm-um/host_ldt-x86_64.h
new file mode 100644
index 000000000000..74a63f7d9a90
--- /dev/null
+++ b/include/asm-um/host_ldt-x86_64.h
@@ -0,0 +1,38 @@
+#ifndef __ASM_HOST_LDT_X86_64_H
+#define __ASM_HOST_LDT_X86_64_H
+
+#include "asm/arch/ldt.h"
+
+/*
+ * macros stolen from include/asm-x86_64/desc.h
+ */
+#define LDT_entry_a(info) \
+	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+/* Don't allow setting of the lm bit. It is useless anyways because
+ * 64bit system calls require __USER_CS. */
+#define LDT_entry_b(info) \
+	(((info)->base_addr & 0xff000000) | \
+	(((info)->base_addr & 0x00ff0000) >> 16) | \
+	((info)->limit & 0xf0000) | \
+	(((info)->read_exec_only ^ 1) << 9) | \
+	((info)->contents << 10) | \
+	(((info)->seg_not_present ^ 1) << 15) | \
+	((info)->seg_32bit << 22) | \
+	((info)->limit_in_pages << 23) | \
+	((info)->useable << 20) | \
+	/* ((info)->lm << 21) | */ \
+	0x7000)
+
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	&& \
+	(info)->lm              == 0)
+
+#endif
diff --git a/include/asm-um/ldt-i386.h b/include/asm-um/ldt-i386.h
deleted file mode 100644
index 175722a91164..000000000000
--- a/include/asm-um/ldt-i386.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_I386_H
-#define __ASM_LDT_I386_H
-
-#include "asm/semaphore.h"
-#include "asm/arch/ldt.h"
-
-struct mmu_context_skas;
-extern void ldt_host_info(void);
-extern long init_new_ldt(struct mmu_context_skas * to_mm,
-			 struct mmu_context_skas * from_mm);
-extern void free_ldt(struct mmu_context_skas * mm);
-
-#define LDT_PAGES_MAX \
-	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
-	(PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
-	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
-	__u32 a;
-	__u32 b;
-};
-
-typedef struct uml_ldt {
-	int entry_count;
-	struct semaphore semaphore;
-	union {
-		struct ldt_entry * pages[LDT_PAGES_MAX];
-		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
-	} u;
-} uml_ldt_t;
-
-/*
- * macros stolen from include/asm-i386/desc.h
- */
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	0x7000)
-
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	)
-
-#endif
diff --git a/include/asm-um/ldt-x86_64.h b/include/asm-um/ldt-x86_64.h
deleted file mode 100644
index 96b35aada79a..000000000000
--- a/include/asm-um/ldt-x86_64.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
- * Licensed under the GPL
- *
- * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
- */
-
-#ifndef __ASM_LDT_X86_64_H
-#define __ASM_LDT_X86_64_H
-
-#include "asm/semaphore.h"
-#include "asm/arch/ldt.h"
-
-struct mmu_context_skas;
-extern void ldt_host_info(void);
-extern long init_new_ldt(struct mmu_context_skas * to_mm,
-			 struct mmu_context_skas * from_mm);
-extern void free_ldt(struct mmu_context_skas * mm);
-
-#define LDT_PAGES_MAX \
-	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
-#define LDT_ENTRIES_PER_PAGE \
-	(PAGE_SIZE/LDT_ENTRY_SIZE)
-#define LDT_DIRECT_ENTRIES \
-	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
-
-struct ldt_entry {
-	__u32 a;
-	__u32 b;
-};
-
-typedef struct uml_ldt {
-	int entry_count;
-	struct semaphore semaphore;
-	union {
-		struct ldt_entry * pages[LDT_PAGES_MAX];
-		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
-	} u;
-} uml_ldt_t;
-
-/*
- * macros stolen from include/asm-x86_64/desc.h
- */
-#define LDT_entry_a(info) \
-	((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-
-/* Don't allow setting of the lm bit. It is useless anyways because
- * 64bit system calls require __USER_CS. */
-#define LDT_entry_b(info) \
-	(((info)->base_addr & 0xff000000) | \
-	(((info)->base_addr & 0x00ff0000) >> 16) | \
-	((info)->limit & 0xf0000) | \
-	(((info)->read_exec_only ^ 1) << 9) | \
-	((info)->contents << 10) | \
-	(((info)->seg_not_present ^ 1) << 15) | \
-	((info)->seg_32bit << 22) | \
-	((info)->limit_in_pages << 23) | \
-	((info)->useable << 20) | \
-	/* ((info)->lm << 21) | */ \
-	0x7000)
-
-#define LDT_empty(info) (\
-	(info)->base_addr	== 0	&& \
-	(info)->limit		== 0	&& \
-	(info)->contents	== 0	&& \
-	(info)->read_exec_only	== 1	&& \
-	(info)->seg_32bit	== 0	&& \
-	(info)->limit_in_pages	== 0	&& \
-	(info)->seg_not_present	== 1	&& \
-	(info)->useable		== 0	&& \
-	(info)->lm              == 0)
-
-#endif
diff --git a/include/asm-um/ldt.h b/include/asm-um/ldt.h
new file mode 100644
index 000000000000..96f82a456ce6
--- /dev/null
+++ b/include/asm-um/ldt.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004 Fujitsu Siemens Computers GmbH
+ * Licensed under the GPL
+ *
+ * Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
+ */
+
+#ifndef __ASM_LDT_H
+#define __ASM_LDT_H
+
+#include "asm/semaphore.h"
+#include "asm/host_ldt.h"
+
+struct mmu_context_skas;
+extern void ldt_host_info(void);
+extern long init_new_ldt(struct mmu_context_skas * to_mm,
+			 struct mmu_context_skas * from_mm);
+extern void free_ldt(struct mmu_context_skas * mm);
+
+#define LDT_PAGES_MAX \
+	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
+#define LDT_ENTRIES_PER_PAGE \
+	(PAGE_SIZE/LDT_ENTRY_SIZE)
+#define LDT_DIRECT_ENTRIES \
+	((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
+
+struct ldt_entry {
+	__u32 a;
+	__u32 b;
+};
+
+typedef struct uml_ldt {
+	int entry_count;
+	struct semaphore semaphore;
+	union {
+		struct ldt_entry * pages[LDT_PAGES_MAX];
+		struct ldt_entry entries[LDT_DIRECT_ENTRIES];
+	} u;
+} uml_ldt_t;
+
+#endif
-- 
cgit v1.2.3


From aa6758d4867cd07bd76105ade6177fe6148e559a Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Fri, 31 Mar 2006 02:30:22 -0800
Subject: [PATCH] uml: implement {get,set}_thread_area for i386

Implement sys_[gs]et_thread_area and the corresponding ptrace operations for
UML.  This is the main chunk, additional parts follow.  This implementation is
now well tested and has run reliably for some time, and we've understood all
the previously existing problems.

Their implementation saves the new GDT content and then forwards the call to
the host when appropriate, i.e.  immediately when the target process is
running or on context switch otherwise (i.e.  on fork and on ptrace() calls).

In SKAS mode, we must switch registers on each context switch (because SKAS
does not switches tls_array together with current->mm).

Also, added get_cpu() locking; this has been done for SKAS mode, since TT does
not need it (it does not use smp_processor_id()).

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/include/os.h               |   4 +
 arch/um/kernel/exec_kern.c         |  12 +-
 arch/um/kernel/process_kern.c      |  20 ++-
 arch/um/kernel/ptrace.c            |  10 ++
 arch/um/kernel/skas/process_kern.c |   2 +
 arch/um/os-Linux/Makefile          |   4 +-
 arch/um/os-Linux/tls.c             |  77 +++++++++
 arch/um/sys-i386/Makefile          |   3 +-
 arch/um/sys-i386/ptrace.c          |   2 +
 arch/um/sys-i386/sys_call_table.S  |   2 -
 arch/um/sys-i386/syscalls.c        |  14 +-
 arch/um/sys-i386/tls.c             | 326 +++++++++++++++++++++++++++++++++++++
 arch/um/sys-x86_64/Makefile        |   3 +-
 arch/um/sys-x86_64/tls.c           |  14 ++
 include/asm-um/desc.h              |  12 +-
 include/asm-um/processor-i386.h    |  35 +++-
 include/asm-um/processor-x86_64.h  |   9 +
 include/asm-um/ptrace-generic.h    |  14 +-
 include/asm-um/ptrace-i386.h       |  41 +++--
 include/asm-um/ptrace-x86_64.h     |  35 ++--
 include/asm-um/segment.h           |   2 +
 21 files changed, 580 insertions(+), 61 deletions(-)
 create mode 100644 arch/um/os-Linux/tls.c
 create mode 100644 arch/um/sys-i386/tls.c
 create mode 100644 arch/um/sys-x86_64/tls.c

(limited to 'include')

diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 5fb84e889b2b..e9e1db1e08d0 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -236,6 +236,10 @@ extern int run_helper_thread(int (*proc)(void *), void *arg,
 			     int stack_order);
 extern int helper_wait(int pid);
 
+
+/* tls.c */
+extern int os_set_thread_area(void *data, int pid);
+extern int os_get_thread_area(void *data, int pid);
 /* umid.c */
 
 extern int umid_file_name(char *name, char *buf, int len);
diff --git a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
index f9b346e05b00..c0cb627bf594 100644
--- a/arch/um/kernel/exec_kern.c
+++ b/arch/um/kernel/exec_kern.c
@@ -22,6 +22,7 @@
 
 void flush_thread(void)
 {
+	arch_flush_thread(&current->thread.arch);
 	CHOOSE_MODE(flush_thread_tt(), flush_thread_skas());
 }
 
@@ -74,14 +75,3 @@ long sys_execve(char __user *file, char __user *__user *argv,
 	unlock_kernel();
 	return(error);
 }
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index ba8a52c1f7ae..f6a5a502120b 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -156,9 +156,25 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long stack_top, struct task_struct * p, 
 		struct pt_regs *regs)
 {
+	int ret;
+
 	p->thread = (struct thread_struct) INIT_THREAD;
-	return(CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr, 
-				clone_flags, sp, stack_top, p, regs));
+	ret = CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr,
+				clone_flags, sp, stack_top, p, regs);
+
+	if (ret || !current->thread.forking)
+		goto out;
+
+	clear_flushed_tls(p);
+
+	/*
+	 * Set a new TLS for the child thread?
+	 */
+	if (clone_flags & CLONE_SETTLS)
+		ret = arch_copy_tls(p);
+
+out:
+	return ret;
 }
 
 void initial_thread_cb(void (*proc)(void *), void *arg)
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 394582202ce6..60d2eda995c1 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -185,6 +185,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = set_fpxregs(data, child);
 		break;
 #endif
+	case PTRACE_GET_THREAD_AREA:
+		ret = ptrace_get_thread_area(child, addr,
+					     (struct user_desc __user *) data);
+		break;
+
+	case PTRACE_SET_THREAD_AREA:
+		ret = ptrace_set_thread_area(child, addr,
+					     (struct user_desc __user *) data);
+		break;
+
 	case PTRACE_FAULTINFO: {
 		/* Take the info from thread->arch->faultinfo,
 		 * but transfer max. sizeof(struct ptrace_faultinfo).
diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
index 14360ac17f02..38b185370c42 100644
--- a/arch/um/kernel/skas/process_kern.c
+++ b/arch/um/kernel/skas/process_kern.c
@@ -111,6 +111,8 @@ int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp,
 		if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp;
 
 		handler = fork_handler;
+
+		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	}
 	else {
 		init_thread_registers(&p->thread.regs.regs);
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 9e0788cf3aec..f4bfc4c7ccac 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -4,7 +4,7 @@
 #
 
 obj-y = aio.o elf_aux.o file.o helper.o irq.o main.o mem.o process.o sigio.o \
-	signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o \
+	signal.o start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o tls.o \
 	user_syms.o util.o drivers/ sys-$(SUBARCH)/
 
 obj-$(CONFIG_MODE_SKAS) += skas/
@@ -12,7 +12,7 @@ obj-$(CONFIG_TTY_LOG) += tty_log.o
 user-objs-$(CONFIG_TTY_LOG) += tty_log.o
 
 USER_OBJS := $(user-objs-y) aio.o elf_aux.o file.o helper.o irq.o main.o mem.o \
-	process.o sigio.o signal.o start_up.o time.o trap.o tt.o tty.o \
+	process.o sigio.o signal.o start_up.o time.o trap.o tt.o tty.o tls.o \
 	uaccess.o umid.o util.o
 
 CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
new file mode 100644
index 000000000000..63dfcf709377
--- /dev/null
+++ b/arch/um/os-Linux/tls.c
@@ -0,0 +1,77 @@
+#include <errno.h>
+#include <sys/ptrace.h>
+#include <asm/ldt.h>
+#include "uml-config.h"
+
+/* TLS support - we basically rely on the host's one.*/
+
+/* In TT mode, this should be called only by the tracing thread, and makes sense
+ * only for PTRACE_SET_THREAD_AREA. In SKAS mode, it's used normally.
+ *
+ */
+
+#ifndef PTRACE_GET_THREAD_AREA
+#define PTRACE_GET_THREAD_AREA 25
+#endif
+
+#ifndef PTRACE_SET_THREAD_AREA
+#define PTRACE_SET_THREAD_AREA 26
+#endif
+
+int os_set_thread_area(void *data, int pid)
+{
+	struct user_desc *info = data;
+	int ret;
+
+	ret = ptrace(PTRACE_SET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
+
+#ifdef UML_CONFIG_MODE_SKAS
+
+int os_get_thread_area(void *data, int pid)
+{
+	struct user_desc *info = data;
+	int ret;
+
+	ret = ptrace(PTRACE_GET_THREAD_AREA, pid, info->entry_number,
+		     (unsigned long) info);
+	if (ret < 0)
+		ret = -errno;
+	return ret;
+}
+
+#endif
+
+#ifdef UML_CONFIG_MODE_TT
+#include "linux/unistd.h"
+
+_syscall1(int, get_thread_area, struct user_desc *, u_info);
+_syscall1(int, set_thread_area, struct user_desc *, u_info);
+
+int do_set_thread_area_tt(struct user_desc *info)
+{
+	int ret;
+
+	ret = set_thread_area(info);
+	if (ret < 0) {
+		ret = -errno;
+	}
+	return ret;
+}
+
+int do_get_thread_area_tt(struct user_desc *info)
+{
+	int ret;
+
+	ret = get_thread_area(info);
+	if (ret < 0) {
+		ret = -errno;
+	}
+	return ret;
+}
+
+#endif /* UML_CONFIG_MODE_TT */
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 2c06a07a1bdb..98b20b7bba4f 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,5 +1,6 @@
 obj-y = bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
-	ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o sys_call_table.o
+	ptrace_user.o signal.o sigcontext.o syscalls.o sysrq.o \
+	sys_call_table.o tls.o
 
 obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
 
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index 927fcd955651..6a23cc6947c3 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -18,10 +18,12 @@
 void arch_switch_to_tt(struct task_struct *from, struct task_struct *to)
 {
 	update_debugregs(to->thread.arch.debugregs_seq);
+	arch_switch_tls_tt(from, to);
 }
 
 void arch_switch_to_skas(struct task_struct *from, struct task_struct *to)
 {
+	arch_switch_tls_skas(from, to);
 }
 
 int is_syscall(unsigned long addr)
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S
index ad75c27afe38..1ff61474b25c 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/um/sys-i386/sys_call_table.S
@@ -6,8 +6,6 @@
 
 #define sys_vm86old sys_ni_syscall
 #define sys_vm86 sys_ni_syscall
-#define sys_set_thread_area sys_ni_syscall
-#define sys_get_thread_area sys_ni_syscall
 
 #define sys_stime um_stime
 #define sys_time um_time
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 1845123ed124..749dd1bfe60f 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -61,21 +61,27 @@ long old_select(struct sel_arg_struct __user *arg)
 	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
 }
 
-/* The i386 version skips reading from %esi, the fourth argument. So we must do
- * this, too.
+/*
+ * The prototype on i386 is:
+ *
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *
+ * and the "newtls" arg. on i386 is read by copy_thread directly from the
+ * register saved on the stack.
  */
 long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tid, int unused, int __user *child_tid)
+	       int __user *parent_tid, void *newtls, int __user *child_tid)
 {
 	long ret;
 
 	if (!newsp)
 		newsp = UPT_SP(&current->thread.regs.regs);
+
 	current->thread.forking = 1;
 	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
 		      child_tid);
 	current->thread.forking = 0;
-	return(ret);
+	return ret;
 }
 
 /*
diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c
new file mode 100644
index 000000000000..e3c5bc593fae
--- /dev/null
+++ b/arch/um/sys-i386/tls.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
+ * Licensed under the GPL
+ */
+
+#include "linux/config.h"
+#include "linux/kernel.h"
+#include "linux/sched.h"
+#include "linux/slab.h"
+#include "linux/types.h"
+#include "asm/uaccess.h"
+#include "asm/ptrace.h"
+#include "asm/segment.h"
+#include "asm/smp.h"
+#include "asm/desc.h"
+#include "choose-mode.h"
+#include "kern.h"
+#include "kern_util.h"
+#include "mode_kern.h"
+#include "os.h"
+#include "mode.h"
+
+#ifdef CONFIG_MODE_SKAS
+#include "skas.h"
+#endif
+
+#ifdef CONFIG_MODE_SKAS
+int do_set_thread_area_skas(struct user_desc *info)
+{
+	int ret;
+	u32 cpu;
+
+	cpu = get_cpu();
+	ret = os_set_thread_area(info, userspace_pid[cpu]);
+	put_cpu();
+	return ret;
+}
+
+int do_get_thread_area_skas(struct user_desc *info)
+{
+	int ret;
+	u32 cpu;
+
+	cpu = get_cpu();
+	ret = os_get_thread_area(info, userspace_pid[cpu]);
+	put_cpu();
+	return ret;
+}
+#endif
+
+/*
+ * sys_get_thread_area: get a yet unused TLS descriptor index.
+ * XXX: Consider leaving one free slot for glibc usage at first place. This must
+ * be done here (and by changing GDT_ENTRY_TLS_* macros) and nowhere else.
+ *
+ * Also, this must be tested when compiling in SKAS mode with dinamic linking
+ * and running against NPTL.
+ */
+static int get_free_idx(struct task_struct* task)
+{
+	struct thread_struct *t = &task->thread;
+	int idx;
+
+	if (!t->arch.tls_array)
+		return GDT_ENTRY_TLS_MIN;
+
+	for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+		if (!t->arch.tls_array[idx].present)
+			return idx + GDT_ENTRY_TLS_MIN;
+	return -ESRCH;
+}
+
+#define O_FORCE 1
+
+static inline void clear_user_desc(struct user_desc* info)
+{
+	/* Postcondition: LDT_empty(info) returns true. */
+	memset(info, 0, sizeof(*info));
+
+	/* Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
+	 * indeed an empty user_desc.
+	 */
+	info->read_exec_only = 1;
+	info->seg_not_present = 1;
+}
+
+static int load_TLS(int flags, struct task_struct *to)
+{
+	int ret = 0;
+	int idx;
+
+	for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) {
+		struct uml_tls_struct* curr = &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
+
+		/* Actually, now if it wasn't flushed it gets cleared and
+		 * flushed to the host, which will clear it.*/
+		if (!curr->present) {
+			if (!curr->flushed) {
+				clear_user_desc(&curr->tls);
+				curr->tls.entry_number = idx;
+			} else {
+				WARN_ON(!LDT_empty(&curr->tls));
+				continue;
+			}
+		}
+
+		if (!(flags & O_FORCE) && curr->flushed)
+			continue;
+
+		ret = do_set_thread_area(&curr->tls);
+		if (ret)
+			goto out;
+
+		curr->flushed = 1;
+	}
+out:
+	return ret;
+}
+
+/* Verify if we need to do a flush for the new process, i.e. if there are any
+ * present desc's, only if they haven't been flushed.
+ */
+static inline int needs_TLS_update(struct task_struct *task)
+{
+	int i;
+	int ret = 0;
+
+	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+		struct uml_tls_struct* curr = &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+		/* Can't test curr->present, we may need to clear a descriptor
+		 * which had a value. */
+		if (curr->flushed)
+			continue;
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+/* On a newly forked process, the TLS descriptors haven't yet been flushed. So
+ * we mark them as such and the first switch_to will do the job.
+ */
+void clear_flushed_tls(struct task_struct *task)
+{
+	int i;
+
+	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
+		struct uml_tls_struct* curr = &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+
+		/* Still correct to do this, if it wasn't present on the host it
+		 * will remain as flushed as it was. */
+		if (!curr->present)
+			continue;
+
+		curr->flushed = 0;
+	}
+}
+
+/* This in SKAS0 does not need to be used, since we have different host
+ * processes. Nor will this need to be used when we'll add support to the host
+ * SKAS patch. */
+int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to)
+{
+	return load_TLS(O_FORCE, to);
+}
+
+int arch_switch_tls_tt(struct task_struct *from, struct task_struct *to)
+{
+	if (needs_TLS_update(to))
+		return load_TLS(0, to);
+
+	return 0;
+}
+
+static int set_tls_entry(struct task_struct* task, struct user_desc *info,
+			 int idx, int flushed)
+{
+	struct thread_struct *t = &task->thread;
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls = *info;
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present = 1;
+	t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed = flushed;
+
+	return 0;
+}
+
+int arch_copy_tls(struct task_struct *new)
+{
+	struct user_desc info;
+	int idx, ret = -EFAULT;
+
+	if (copy_from_user(&info,
+			   (void __user *) UPT_ESI(&new->thread.regs.regs),
+			   sizeof(info)))
+		goto out;
+
+	ret = -EINVAL;
+	if (LDT_empty(&info))
+		goto out;
+
+	idx = info.entry_number;
+
+	ret = set_tls_entry(new, &info, idx, 0);
+out:
+	return ret;
+}
+
+/* XXX: use do_get_thread_area to read the host value? I'm not at all sure! */
+static int get_tls_entry(struct task_struct* task, struct user_desc *info, int idx)
+{
+	struct thread_struct *t = &task->thread;
+
+	if (!t->arch.tls_array)
+		goto clear;
+
+	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+		return -EINVAL;
+
+	if (!t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].present)
+		goto clear;
+
+	*info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls;
+
+out:
+	/* Temporary debugging check, to make sure that things have been
+	 * flushed. This could be triggered if load_TLS() failed.
+	 */
+	if (unlikely(task == current && !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
+		printk(KERN_ERR "get_tls_entry: task with pid %d got here "
+				"without flushed TLS.", current->pid);
+	}
+
+	return 0;
+clear:
+	/* When the TLS entry has not been set, the values read to user in the
+	 * tls_array are 0 (because it's cleared at boot, see
+	 * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that.
+	 */
+	clear_user_desc(info);
+	info->entry_number = idx;
+	goto out;
+}
+
+asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int idx, ret;
+
+	if (copy_from_user(&info, user_desc, sizeof(info)))
+		return -EFAULT;
+
+	idx = info.entry_number;
+
+	if (idx == -1) {
+		idx = get_free_idx(current);
+		if (idx < 0)
+			return idx;
+		info.entry_number = idx;
+		/* Tell the user which slot we chose for him.*/
+		if (put_user(idx, &user_desc->entry_number))
+			return -EFAULT;
+	}
+
+	ret = CHOOSE_MODE_PROC(do_set_thread_area_tt, do_set_thread_area_skas, &info);
+	if (ret)
+		return ret;
+	return set_tls_entry(current, &info, idx, 1);
+}
+
+/*
+ * Perform set_thread_area on behalf of the traced child.
+ * Note: error handling is not done on the deferred load, and this differ from
+ * i386. However the only possible error are caused by bugs.
+ */
+int ptrace_set_thread_area(struct task_struct *child, int idx,
+		struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+
+	if (copy_from_user(&info, user_desc, sizeof(info)))
+		return -EFAULT;
+
+	return set_tls_entry(child, &info, idx, 0);
+}
+
+asmlinkage int sys_get_thread_area(struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int idx, ret;
+
+	if (get_user(idx, &user_desc->entry_number))
+		return -EFAULT;
+
+	ret = get_tls_entry(current, &info, idx);
+	if (ret < 0)
+		goto out;
+
+	if (copy_to_user(user_desc, &info, sizeof(info)))
+		ret = -EFAULT;
+
+out:
+	return ret;
+}
+
+/*
+ * Perform get_thread_area on behalf of the traced child.
+ */
+int ptrace_get_thread_area(struct task_struct *child, int idx,
+		struct user_desc __user *user_desc)
+{
+	struct user_desc info;
+	int ret;
+
+	ret = get_tls_entry(child, &info, idx);
+	if (ret < 0)
+		goto out;
+
+	if (copy_to_user(user_desc, &info, sizeof(info)))
+		ret = -EFAULT;
+out:
+	return ret;
+}
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 1ee2ed1599be..b5fc22babddf 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -5,7 +5,8 @@
 #
 
 obj-y = bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
-	sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o
+	sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o ksyms.o \
+	tls.o
 
 obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
 obj-$(CONFIG_MODULES) += um_module.o
diff --git a/arch/um/sys-x86_64/tls.c b/arch/um/sys-x86_64/tls.c
new file mode 100644
index 000000000000..ce1bf1b81c43
--- /dev/null
+++ b/arch/um/sys-x86_64/tls.c
@@ -0,0 +1,14 @@
+#include "linux/sched.h"
+
+void debug_arch_force_load_TLS(void)
+{
+}
+
+void clear_flushed_tls(struct task_struct *task)
+{
+}
+
+int arch_copy_tls(struct task_struct *t)
+{
+        return 0;
+}
diff --git a/include/asm-um/desc.h b/include/asm-um/desc.h
index ac1d2a20d178..4ec34a51b62c 100644
--- a/include/asm-um/desc.h
+++ b/include/asm-um/desc.h
@@ -1,6 +1,16 @@
 #ifndef __UM_DESC_H
 #define __UM_DESC_H
 
-#include "asm/arch/desc.h"
+/* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't
+ * compile, and has never been used. */
+#define LDT_empty(info) (\
+	(info)->base_addr	== 0	&& \
+	(info)->limit		== 0	&& \
+	(info)->contents	== 0	&& \
+	(info)->read_exec_only	== 1	&& \
+	(info)->seg_32bit	== 0	&& \
+	(info)->limit_in_pages	== 0	&& \
+	(info)->seg_not_present	== 1	&& \
+	(info)->useable		== 0	)
 
 #endif
diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
index 4108a579eb92..595f1c3e1e40 100644
--- a/include/asm-um/processor-i386.h
+++ b/include/asm-um/processor-i386.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -6,21 +6,48 @@
 #ifndef __UM_PROCESSOR_I386_H
 #define __UM_PROCESSOR_I386_H
 
+#include "linux/string.h"
+#include "asm/host_ldt.h"
+#include "asm/segment.h"
+
 extern int host_has_xmm;
 extern int host_has_cmov;
 
 /* include faultinfo structure */
 #include "sysdep/faultinfo.h"
 
+struct uml_tls_struct {
+	struct user_desc tls;
+	unsigned flushed:1;
+	unsigned present:1;
+};
+
 struct arch_thread {
+	struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
 	unsigned long debugregs[8];
 	int debugregs_seq;
 	struct faultinfo faultinfo;
 };
 
-#define INIT_ARCH_THREAD { .debugregs  		= { [ 0 ... 7 ] = 0 }, \
-                           .debugregs_seq	= 0, \
-                           .faultinfo		= { 0, 0, 0 } }
+#define INIT_ARCH_THREAD { \
+	.tls_array  		= { [ 0 ... GDT_ENTRY_TLS_ENTRIES - 1 ] = \
+				    { .present = 0, .flushed = 0 } }, \
+	.debugregs  		= { [ 0 ... 7 ] = 0 }, \
+	.debugregs_seq		= 0, \
+	.faultinfo		= { 0, 0, 0 } \
+}
+
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+	/* Clear any TLS still hanging */
+	memset(&thread->tls_array, 0, sizeof(thread->tls_array));
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+        memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
+}
 
 #include "asm/arch/user.h"
 
diff --git a/include/asm-um/processor-x86_64.h b/include/asm-um/processor-x86_64.h
index e1e1255a1d36..10609af376c0 100644
--- a/include/asm-um/processor-x86_64.h
+++ b/include/asm-um/processor-x86_64.h
@@ -28,6 +28,15 @@ extern inline void rep_nop(void)
                            .debugregs_seq	= 0, \
                            .faultinfo		= { 0, 0, 0 } }
 
+static inline void arch_flush_thread(struct arch_thread *thread)
+{
+}
+
+static inline void arch_copy_thread(struct arch_thread *from,
+                                    struct arch_thread *to)
+{
+}
+
 #include "asm/arch/user.h"
 
 #define current_text_addr() \
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index 8c57e384cb8a..503484305e67 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -60,17 +60,9 @@ extern void show_regs(struct pt_regs *regs);
 extern void send_sigtrap(struct task_struct *tsk, union uml_pt_regs *regs,
 			 int error_code);
 
-#endif
+extern int arch_copy_tls(struct task_struct *new);
+extern void clear_flushed_tls(struct task_struct *task);
 
 #endif
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/include/asm-um/ptrace-i386.h b/include/asm-um/ptrace-i386.h
index fe882b9d917e..30656c962d74 100644
--- a/include/asm-um/ptrace-i386.h
+++ b/include/asm-um/ptrace-i386.h
@@ -8,8 +8,11 @@
 
 #define HOST_AUDIT_ARCH AUDIT_ARCH_I386
 
+#include "linux/compiler.h"
 #include "sysdep/ptrace.h"
 #include "asm/ptrace-generic.h"
+#include "asm/host_ldt.h"
+#include "choose-mode.h"
 
 #define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
 #define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
@@ -38,15 +41,31 @@
 
 #define user_mode(r) UPT_IS_USER(&(r)->regs)
 
-#endif
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+extern int do_set_thread_area_skas(struct user_desc *info);
+extern int do_get_thread_area_skas(struct user_desc *info);
+
+extern int do_set_thread_area_tt(struct user_desc *info);
+extern int do_get_thread_area_tt(struct user_desc *info);
+
+extern int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to);
+extern int arch_switch_tls_tt(struct task_struct *from, struct task_struct *to);
+
+static inline int do_get_thread_area(struct user_desc *info)
+{
+	return CHOOSE_MODE_PROC(do_get_thread_area_tt, do_get_thread_area_skas, info);
+}
+
+static inline int do_set_thread_area(struct user_desc *info)
+{
+	return CHOOSE_MODE_PROC(do_set_thread_area_tt, do_set_thread_area_skas, info);
+}
+
+struct task_struct;
+
+#endif
diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h
index be51219a8ffe..c894e68b1f96 100644
--- a/include/asm-um/ptrace-x86_64.h
+++ b/include/asm-um/ptrace-x86_64.h
@@ -8,6 +8,8 @@
 #define __UM_PTRACE_X86_64_H
 
 #include "linux/compiler.h"
+#include "asm/errno.h"
+#include "asm/host_ldt.h"
 
 #define signal_fault signal_fault_x86_64
 #define __FRAME_OFFSETS /* Needed to get the R* macros */
@@ -63,15 +65,26 @@ void signal_fault(struct pt_regs_subarch *regs, void *frame, char *where);
 
 #define profile_pc(regs) PT_REGS_IP(regs)
 
-#endif
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+static inline void arch_switch_to_tt(struct task_struct *from,
+                                     struct task_struct *to)
+{
+}
+
+static inline void arch_switch_to_skas(struct task_struct *from,
+                                       struct task_struct *to)
+{
+}
+
+#endif
diff --git a/include/asm-um/segment.h b/include/asm-um/segment.h
index 55e40301f625..48775452e2cc 100644
--- a/include/asm-um/segment.h
+++ b/include/asm-um/segment.h
@@ -1,4 +1,6 @@
 #ifndef __UM_SEGMENT_H
 #define __UM_SEGMENT_H
 
+#include "asm/arch/segment.h"
+
 #endif
-- 
cgit v1.2.3


From 3feb88562d149f078319e5a1b2f7acaa10251a5c Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Fri, 31 Mar 2006 02:30:25 -0800
Subject: [PATCH] uml: check for differences in host support

If running on a host not supporting TLS (for instance 2.4) we should report
that cleanly to the user, instead of printing not comprehensible "error 5" for
that.

Additionally, i386 and x86_64 support different ranges for
user_desc->entry_number, and we must account for that; we couldn't pass
ourselves -1 because we need to override previously existing TLS descriptors
which glibc has possibly set, so test at startup the range to use.

x86 and x86_64 existing ranges are hardcoded.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/include/os.h               |  1 +
 arch/um/include/sysdep-i386/tls.h  |  4 +++
 arch/um/include/user_util.h        |  3 +++
 arch/um/os-Linux/sys-i386/Makefile |  2 +-
 arch/um/os-Linux/sys-i386/tls.c    | 33 +++++++++++++++++++++++
 arch/um/os-Linux/tls.c             |  4 +--
 arch/um/sys-i386/tls.c             | 55 ++++++++++++++++++++++++++++++++++++--
 include/asm-um/segment.h           |  6 ++++-
 8 files changed, 102 insertions(+), 6 deletions(-)
 create mode 100644 arch/um/os-Linux/sys-i386/tls.c

(limited to 'include')

diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 1b780b5dacbe..f88856c28a66 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -173,6 +173,7 @@ extern int os_fchange_dir(int fd);
 extern void os_early_checks(void);
 extern int can_do_skas(void);
 extern void os_check_bugs(void);
+extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 
 /* Make sure they are clear when running in TT mode. Required by
  * SEGV_MAYBE_FIXABLE */
diff --git a/arch/um/include/sysdep-i386/tls.h b/arch/um/include/sysdep-i386/tls.h
index 938f953b26cc..918fd3c5ff9c 100644
--- a/arch/um/include/sysdep-i386/tls.h
+++ b/arch/um/include/sysdep-i386/tls.h
@@ -25,4 +25,8 @@ typedef struct um_dup_user_desc {
 typedef struct user_desc user_desc_t;
 
 # endif /* __KERNEL__ */
+
+#define GDT_ENTRY_TLS_MIN_I386 6
+#define GDT_ENTRY_TLS_MIN_X86_64 12
+
 #endif /* _SYSDEP_TLS_H */
diff --git a/arch/um/include/user_util.h b/arch/um/include/user_util.h
index 615f2f0a32d0..fe0c29b5144d 100644
--- a/arch/um/include/user_util.h
+++ b/arch/um/include/user_util.h
@@ -8,6 +8,9 @@
 
 #include "sysdep/ptrace.h"
 
+/* Copied from kernel.h */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
 extern int mode_tt;
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index 340ef26f5944..b3213613c41c 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
 # Licensed under the GPL
 #
 
-obj-$(CONFIG_MODE_SKAS) = registers.o
+obj-$(CONFIG_MODE_SKAS) = registers.o tls.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-i386/tls.c b/arch/um/os-Linux/sys-i386/tls.c
new file mode 100644
index 000000000000..ba21f0e04a2f
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/tls.c
@@ -0,0 +1,33 @@
+#include <linux/unistd.h>
+#include "sysdep/tls.h"
+#include "user_util.h"
+
+static _syscall1(int, get_thread_area, user_desc_t *, u_info);
+
+/* Checks whether host supports TLS, and sets *tls_min according to the value
+ * valid on the host.
+ * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
+void check_host_supports_tls(int *supports_tls, int *tls_min) {
+	/* Values for x86 and x86_64.*/
+	int val[] = {GDT_ENTRY_TLS_MIN_I386, GDT_ENTRY_TLS_MIN_X86_64};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(val); i++) {
+		user_desc_t info;
+		info.entry_number = val[i];
+
+		if (get_thread_area(&info) == 0) {
+			*tls_min = val[i];
+			*supports_tls = 1;
+			return;
+		} else {
+			if (errno == EINVAL)
+				continue;
+			else if (errno == ENOSYS)
+				*supports_tls = 0;
+				return;
+		}
+	}
+
+	*supports_tls = 0;
+}
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
index 642db553ab60..9cb09a45546b 100644
--- a/arch/um/os-Linux/tls.c
+++ b/arch/um/os-Linux/tls.c
@@ -48,8 +48,8 @@ int os_get_thread_area(user_desc_t *info, int pid)
 #ifdef UML_CONFIG_MODE_TT
 #include "linux/unistd.h"
 
-_syscall1(int, get_thread_area, user_desc_t *, u_info);
-_syscall1(int, set_thread_area, user_desc_t *, u_info);
+static _syscall1(int, get_thread_area, user_desc_t *, u_info);
+static _syscall1(int, set_thread_area, user_desc_t *, u_info);
 
 int do_set_thread_area_tt(user_desc_t *info)
 {
diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c
index 2251654c6b45..a3188e861cc7 100644
--- a/arch/um/sys-i386/tls.c
+++ b/arch/um/sys-i386/tls.c
@@ -24,6 +24,10 @@
 #include "skas.h"
 #endif
 
+/* If needed we can detect when it's uninitialized. */
+static int host_supports_tls = -1;
+int host_gdt_entry_tls_min = -1;
+
 #ifdef CONFIG_MODE_SKAS
 int do_set_thread_area_skas(struct user_desc *info)
 {
@@ -157,11 +161,20 @@ void clear_flushed_tls(struct task_struct *task)
 	}
 }
 
-/* This in SKAS0 does not need to be used, since we have different host
- * processes. Nor will this need to be used when we'll add support to the host
+/* In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
+ * common host process. So this is needed in SKAS0 too.
+ *
+ * However, if each thread had a different host process (and this was discussed
+ * for SMP support) this won't be needed.
+ *
+ * And this will not need be used when (and if) we'll add support to the host
  * SKAS patch. */
+
 int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to)
 {
+	if (!host_supports_tls)
+		return 0;
+
 	/* We have no need whatsoever to switch TLS for kernel threads; beyond
 	 * that, that would also result in us calling os_set_thread_area with
 	 * userspace_pid[cpu] == 0, which gives an error. */
@@ -173,6 +186,9 @@ int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to)
 
 int arch_switch_tls_tt(struct task_struct *from, struct task_struct *to)
 {
+	if (!host_supports_tls)
+		return 0;
+
 	if (needs_TLS_update(to))
 		return load_TLS(0, to);
 
@@ -256,6 +272,9 @@ asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc)
 	struct user_desc info;
 	int idx, ret;
 
+	if (!host_supports_tls)
+		return -ENOSYS;
+
 	if (copy_from_user(&info, user_desc, sizeof(info)))
 		return -EFAULT;
 
@@ -287,6 +306,9 @@ int ptrace_set_thread_area(struct task_struct *child, int idx,
 {
 	struct user_desc info;
 
+	if (!host_supports_tls)
+		return -EIO;
+
 	if (copy_from_user(&info, user_desc, sizeof(info)))
 		return -EFAULT;
 
@@ -298,6 +320,9 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *user_desc)
 	struct user_desc info;
 	int idx, ret;
 
+	if (!host_supports_tls)
+		return -ENOSYS;
+
 	if (get_user(idx, &user_desc->entry_number))
 		return -EFAULT;
 
@@ -321,6 +346,9 @@ int ptrace_get_thread_area(struct task_struct *child, int idx,
 	struct user_desc info;
 	int ret;
 
+	if (!host_supports_tls)
+		return -EIO;
+
 	ret = get_tls_entry(child, &info, idx);
 	if (ret < 0)
 		goto out;
@@ -331,3 +359,26 @@ out:
 	return ret;
 }
 
+
+/* XXX: This part is probably common to i386 and x86-64. Don't create a common
+ * file for now, do that when implementing x86-64 support.*/
+static int __init __setup_host_supports_tls(void) {
+	check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min);
+	if (host_supports_tls) {
+		printk(KERN_INFO "Host TLS support detected\n");
+		printk(KERN_INFO "Detected host type: ");
+		switch (host_gdt_entry_tls_min) {
+			case GDT_ENTRY_TLS_MIN_I386:
+				printk("i386\n");
+				break;
+			case GDT_ENTRY_TLS_MIN_X86_64:
+				printk("x86_64\n");
+				break;
+		}
+	} else
+		printk(KERN_ERR "  Host TLS support NOT detected! "
+				"TLS support inside UML will not work\n");
+	return 1;
+}
+
+__initcall(__setup_host_supports_tls);
diff --git a/include/asm-um/segment.h b/include/asm-um/segment.h
index 48775452e2cc..45183fcd10b6 100644
--- a/include/asm-um/segment.h
+++ b/include/asm-um/segment.h
@@ -1,6 +1,10 @@
 #ifndef __UM_SEGMENT_H
 #define __UM_SEGMENT_H
 
-#include "asm/arch/segment.h"
+extern int host_gdt_entry_tls_min;
+
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define GDT_ENTRY_TLS_MIN host_gdt_entry_tls_min
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
 #endif
-- 
cgit v1.2.3


From 97db7fbfc7712bc403330c81c04ddfa82b335bce Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 31 Mar 2006 02:30:26 -0800
Subject: [PATCH] for_each_possible_cpu: s390

for_each_cpu() actually iterates across all possible CPUs.  We've had mistakes
in the past where people were using for_each_cpu() where they should have been
iterating across only online or present CPUs.  This is inefficient and
possibly buggy.

We're renaming for_each_cpu() to for_each_possible_cpu() to avoid this in the
future.

This patch replaces for_each_cpu with for_each_possible_cpu.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/smp.c    | 6 +++---
 include/asm-s390/percpu.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2b8841f85534..343120c9223d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -801,7 +801,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
          */
 	print_cpu_info(&S390_lowcore.cpu_data);
 
-        for_each_cpu(i) {
+        for_each_possible_cpu(i) {
 		lowcore_ptr[i] = (struct _lowcore *)
 			__get_free_pages(GFP_KERNEL|GFP_DMA, 
 					sizeof(void*) == 8 ? 1 : 0);
@@ -831,7 +831,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 #endif
 	set_prefix((u32)(unsigned long) lowcore_ptr[smp_processor_id()]);
 
-	for_each_cpu(cpu)
+	for_each_possible_cpu(cpu)
 		if (cpu != smp_processor_id())
 			smp_create_idle(cpu);
 }
@@ -868,7 +868,7 @@ static int __init topology_init(void)
 	int cpu;
 	int ret;
 
-	for_each_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		ret = register_cpu(&per_cpu(cpu_devices, cpu), cpu, NULL);
 		if (ret)
 			printk(KERN_WARNING "topology_init: register_cpu %d "
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index e10ed87094f0..436d216601e5 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -46,7 +46,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for_each_cpu(__i)					\
+	for_each_possible_cpu(__i)				\
 		memcpy((pcpudst)+__per_cpu_offset[__i],		\
 		       (src), (size));				\
 } while (0)
-- 
cgit v1.2.3


From 3691c5199e8a4be1c7a91b5ab925db5feb866e19 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 31 Mar 2006 02:30:30 -0800
Subject: [PATCH] kill __init_timer_base in favor of boot_tvec_bases

Commit a4a6198b80cf82eb8160603c98da218d1bd5e104:
	[PATCH] tvec_bases too large for per-cpu data

introduced "struct tvec_t_base_s boot_tvec_bases" which is visible at
compile time.  This means we can kill __init_timer_base and move
timer_base_s's content into tvec_t_base_s.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timer.h |  8 ++---
 kernel/timer.c        | 84 +++++++++++++++++++++------------------------------
 2 files changed, 39 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index b5caabca553c..0a485beba9f5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -6,7 +6,7 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 
-struct timer_base_s;
+struct tvec_t_base_s;
 
 struct timer_list {
 	struct list_head entry;
@@ -15,16 +15,16 @@ struct timer_list {
 	void (*function)(unsigned long);
 	unsigned long data;
 
-	struct timer_base_s *base;
+	struct tvec_t_base_s *base;
 };
 
-extern struct timer_base_s __init_timer_base;
+extern struct tvec_t_base_s boot_tvec_bases;
 
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.function = (_function),			\
 		.expires = (_expires),				\
 		.data = (_data),				\
-		.base = &__init_timer_base,			\
+		.base = &boot_tvec_bases,			\
 	}
 
 #define DEFINE_TIMER(_name, _function, _expires, _data)		\
diff --git a/kernel/timer.c b/kernel/timer.c
index ab189dd187cb..b04dc03b5934 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(jiffies_64);
 /*
  * per-CPU timer vector definitions:
  */
-
 #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
 #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
 #define TVN_SIZE (1 << TVN_BITS)
@@ -62,11 +61,6 @@ EXPORT_SYMBOL(jiffies_64);
 #define TVN_MASK (TVN_SIZE - 1)
 #define TVR_MASK (TVR_SIZE - 1)
 
-struct timer_base_s {
-	spinlock_t lock;
-	struct timer_list *running_timer;
-};
-
 typedef struct tvec_s {
 	struct list_head vec[TVN_SIZE];
 } tvec_t;
@@ -76,7 +70,8 @@ typedef struct tvec_root_s {
 } tvec_root_t;
 
 struct tvec_t_base_s {
-	struct timer_base_s t_base;
+	spinlock_t lock;
+	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
 	tvec_root_t tv1;
 	tvec_t tv2;
@@ -87,13 +82,14 @@ struct tvec_t_base_s {
 
 typedef struct tvec_t_base_s tvec_base_t;
 static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
-static tvec_base_t boot_tvec_bases;
+tvec_base_t boot_tvec_bases;
+EXPORT_SYMBOL(boot_tvec_bases);
 
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
 {
 #ifdef CONFIG_SMP
-	base->t_base.running_timer = timer;
+	base->running_timer = timer;
 #endif
 }
 
@@ -139,15 +135,6 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
 	list_add_tail(&timer->entry, vec);
 }
 
-typedef struct timer_base_s timer_base_t;
-/*
- * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
- * at compile time, and we need timer->base to lock the timer.
- */
-timer_base_t __init_timer_base
-	____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
-EXPORT_SYMBOL(__init_timer_base);
-
 /***
  * init_timer - initialize a timer.
  * @timer: the timer to be initialized
@@ -158,7 +145,7 @@ EXPORT_SYMBOL(__init_timer_base);
 void fastcall init_timer(struct timer_list *timer)
 {
 	timer->entry.next = NULL;
-	timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
+	timer->base = per_cpu(tvec_bases, raw_smp_processor_id());
 }
 EXPORT_SYMBOL(init_timer);
 
@@ -174,7 +161,7 @@ static inline void detach_timer(struct timer_list *timer,
 }
 
 /*
- * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
+ * We are using hashed locking: holding per_cpu(tvec_bases).lock
  * means that all timers which are tied to this base via timer->base are
  * locked, and the base itself is locked too.
  *
@@ -185,10 +172,10 @@ static inline void detach_timer(struct timer_list *timer,
  * possible to set timer->base = NULL and drop the lock: the timer remains
  * locked.
  */
-static timer_base_t *lock_timer_base(struct timer_list *timer,
+static tvec_base_t *lock_timer_base(struct timer_list *timer,
 					unsigned long *flags)
 {
-	timer_base_t *base;
+	tvec_base_t *base;
 
 	for (;;) {
 		base = timer->base;
@@ -205,8 +192,7 @@ static timer_base_t *lock_timer_base(struct timer_list *timer,
 
 int __mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	timer_base_t *base;
-	tvec_base_t *new_base;
+	tvec_base_t *base, *new_base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -221,7 +207,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
 
 	new_base = __get_cpu_var(tvec_bases);
 
-	if (base != &new_base->t_base) {
+	if (base != new_base) {
 		/*
 		 * We are trying to schedule the timer on the local CPU.
 		 * However we can't change timer's base while it is running,
@@ -231,19 +217,19 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
 		 */
 		if (unlikely(base->running_timer == timer)) {
 			/* The timer remains on a former base */
-			new_base = container_of(base, tvec_base_t, t_base);
+			new_base = base;
 		} else {
 			/* See the comment in lock_timer_base() */
 			timer->base = NULL;
 			spin_unlock(&base->lock);
-			spin_lock(&new_base->t_base.lock);
-			timer->base = &new_base->t_base;
+			spin_lock(&new_base->lock);
+			timer->base = new_base;
 		}
 	}
 
 	timer->expires = expires;
 	internal_add_timer(new_base, timer);
-	spin_unlock_irqrestore(&new_base->t_base.lock, flags);
+	spin_unlock_irqrestore(&new_base->lock, flags);
 
 	return ret;
 }
@@ -263,10 +249,10 @@ void add_timer_on(struct timer_list *timer, int cpu)
   	unsigned long flags;
 
   	BUG_ON(timer_pending(timer) || !timer->function);
-	spin_lock_irqsave(&base->t_base.lock, flags);
-	timer->base = &base->t_base;
+	spin_lock_irqsave(&base->lock, flags);
+	timer->base = base;
 	internal_add_timer(base, timer);
-	spin_unlock_irqrestore(&base->t_base.lock, flags);
+	spin_unlock_irqrestore(&base->lock, flags);
 }
 
 
@@ -319,7 +305,7 @@ EXPORT_SYMBOL(mod_timer);
  */
 int del_timer(struct timer_list *timer)
 {
-	timer_base_t *base;
+	tvec_base_t *base;
 	unsigned long flags;
 	int ret = 0;
 
@@ -346,7 +332,7 @@ EXPORT_SYMBOL(del_timer);
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
-	timer_base_t *base;
+	tvec_base_t *base;
 	unsigned long flags;
 	int ret = -1;
 
@@ -410,7 +396,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
 		struct timer_list *tmp;
 
 		tmp = list_entry(curr, struct timer_list, entry);
-		BUG_ON(tmp->base != &base->t_base);
+		BUG_ON(tmp->base != base);
 		curr = curr->next;
 		internal_add_timer(base, tmp);
 	}
@@ -432,7 +418,7 @@ static inline void __run_timers(tvec_base_t *base)
 {
 	struct timer_list *timer;
 
-	spin_lock_irq(&base->t_base.lock);
+	spin_lock_irq(&base->lock);
 	while (time_after_eq(jiffies, base->timer_jiffies)) {
 		struct list_head work_list = LIST_HEAD_INIT(work_list);
 		struct list_head *head = &work_list;
@@ -458,7 +444,7 @@ static inline void __run_timers(tvec_base_t *base)
 
 			set_running_timer(base, timer);
 			detach_timer(timer, 1);
-			spin_unlock_irq(&base->t_base.lock);
+			spin_unlock_irq(&base->lock);
 			{
 				int preempt_count = preempt_count();
 				fn(data);
@@ -471,11 +457,11 @@ static inline void __run_timers(tvec_base_t *base)
 					BUG();
 				}
 			}
-			spin_lock_irq(&base->t_base.lock);
+			spin_lock_irq(&base->lock);
 		}
 	}
 	set_running_timer(base, NULL);
-	spin_unlock_irq(&base->t_base.lock);
+	spin_unlock_irq(&base->lock);
 }
 
 #ifdef CONFIG_NO_IDLE_HZ
@@ -506,7 +492,7 @@ unsigned long next_timer_interrupt(void)
 	hr_expires += jiffies;
 
 	base = __get_cpu_var(tvec_bases);
-	spin_lock(&base->t_base.lock);
+	spin_lock(&base->lock);
 	expires = base->timer_jiffies + (LONG_MAX >> 1);
 	list = NULL;
 
@@ -554,7 +540,7 @@ found:
 				expires = nte->expires;
 		}
 	}
-	spin_unlock(&base->t_base.lock);
+	spin_unlock(&base->lock);
 
 	if (time_before(hr_expires, expires))
 		return hr_expires;
@@ -1262,7 +1248,7 @@ static int __devinit init_timers_cpu(int cpu)
 		}
 		per_cpu(tvec_bases, cpu) = base;
 	}
-	spin_lock_init(&base->t_base.lock);
+	spin_lock_init(&base->lock);
 	for (j = 0; j < TVN_SIZE; j++) {
 		INIT_LIST_HEAD(base->tv5.vec + j);
 		INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1284,7 +1270,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 	while (!list_empty(head)) {
 		timer = list_entry(head->next, struct timer_list, entry);
 		detach_timer(timer, 0);
-		timer->base = &new_base->t_base;
+		timer->base = new_base;
 		internal_add_timer(new_base, timer);
 	}
 }
@@ -1300,11 +1286,11 @@ static void __devinit migrate_timers(int cpu)
 	new_base = get_cpu_var(tvec_bases);
 
 	local_irq_disable();
-	spin_lock(&new_base->t_base.lock);
-	spin_lock(&old_base->t_base.lock);
+	spin_lock(&new_base->lock);
+	spin_lock(&old_base->lock);
+
+	BUG_ON(old_base->running_timer);
 
-	if (old_base->t_base.running_timer)
-		BUG();
 	for (i = 0; i < TVR_SIZE; i++)
 		migrate_timer_list(new_base, old_base->tv1.vec + i);
 	for (i = 0; i < TVN_SIZE; i++) {
@@ -1314,8 +1300,8 @@ static void __devinit migrate_timers(int cpu)
 		migrate_timer_list(new_base, old_base->tv5.vec + i);
 	}
 
-	spin_unlock(&old_base->t_base.lock);
-	spin_unlock(&new_base->t_base.lock);
+	spin_unlock(&old_base->lock);
+	spin_unlock(&new_base->lock);
 	local_irq_enable();
 	put_cpu_var(tvec_bases);
 }
-- 
cgit v1.2.3


From 68eef3b4791572ecb70249c7fb145bb3742dd899 Mon Sep 17 00:00:00 2001
From: Joe Korty <joe.korty@ccur.com>
Date: Fri, 31 Mar 2006 02:30:32 -0800
Subject: [PATCH] Simplify proc/devices and fix early termination regression

Make baby-simple the code for /proc/devices.  Based on the proven design
for /proc/interrupts.

This also fixes the early-termination regression 2.6.16 introduced, as
demonstrated by:

    # dd if=/proc/devices bs=1
    Character devices:
      1 mem
    27+0 records in
    27+0 records out

This should also work (but is untested) when /proc/devices >4096 bytes,
which I believe is what the original 2.6.16 rewrite fixed.

[akpm@osdl.org: cleanups, simplifications]
Signed-off-by: Joe Korty <joe.korty@ccur.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/genhd.c       | 103 ++++-----------------------------
 fs/char_dev.c       |  87 ++++------------------------
 fs/proc/proc_misc.c | 163 ++++++++++++----------------------------------------
 include/linux/fs.h  |  15 ++---
 4 files changed, 62 insertions(+), 306 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index db4c60c802d6..5a8d3bf02f17 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -17,8 +17,6 @@
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
 
-#define MAX_PROBE_HASH 255	/* random */
-
 static struct subsystem block_subsys;
 
 static DEFINE_MUTEX(block_subsys_lock);
@@ -31,108 +29,29 @@ static struct blk_major_name {
 	struct blk_major_name *next;
 	int major;
 	char name[16];
-} *major_names[MAX_PROBE_HASH];
+} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 
 /* index in the above - for now: assume no multimajor ranges */
 static inline int major_to_index(int major)
 {
-	return major % MAX_PROBE_HASH;
-}
-
-struct blkdev_info {
-        int index;
-        struct blk_major_name *bd;
-};
-
-/*
- * iterate over a list of blkdev_info structures.  allows
- * the major_names array to be iterated over from outside this file
- * must be called with the block_subsys_lock held
- */
-void *get_next_blkdev(void *dev)
-{
-        struct blkdev_info *info;
-
-        if (dev == NULL) {
-                info = kmalloc(sizeof(*info), GFP_KERNEL);
-                if (!info)
-                        goto out;
-                info->index=0;
-                info->bd = major_names[info->index];
-                if (info->bd)
-                        goto out;
-        } else {
-                info = dev;
-        }
-
-        while (info->index < ARRAY_SIZE(major_names)) {
-                if (info->bd)
-                        info->bd = info->bd->next;
-                if (info->bd)
-                        goto out;
-                /*
-                 * No devices on this chain, move to the next
-                 */
-                info->index++;
-                info->bd = (info->index < ARRAY_SIZE(major_names)) ?
-			major_names[info->index] : NULL;
-                if (info->bd)
-                        goto out;
-        }
-
-out:
-        return info;
-}
-
-void *acquire_blkdev_list(void)
-{
-        mutex_lock(&block_subsys_lock);
-        return get_next_blkdev(NULL);
-}
-
-void release_blkdev_list(void *dev)
-{
-        mutex_unlock(&block_subsys_lock);
-        kfree(dev);
+	return major % BLKDEV_MAJOR_HASH_SIZE;
 }
 
+#ifdef CONFIG_PROC_FS
 
-/*
- * Count the number of records in the blkdev_list.
- * must be called with the block_subsys_lock held
- */
-int count_blkdev_list(void)
+void blkdev_show(struct seq_file *f, off_t offset)
 {
-	struct blk_major_name *n;
-	int i, count;
+	struct blk_major_name *dp;
 
-	count = 0;
-
-	for (i = 0; i < ARRAY_SIZE(major_names); i++) {
-		for (n = major_names[i]; n; n = n->next)
-				count++;
+	if (offset < BLKDEV_MAJOR_HASH_SIZE) {
+		mutex_lock(&block_subsys_lock);
+		for (dp = major_names[offset]; dp; dp = dp->next)
+			seq_printf(f, "%3d %s\n", dp->major, dp->name);
+		mutex_unlock(&block_subsys_lock);
 	}
-
-	return count;
-}
-
-/*
- * extract the major and name values from a blkdev_info struct
- * passed in as a void to *dev.  Must be called with
- * block_subsys_lock held
- */
-int get_blkdev_info(void *dev, int *major, char **name)
-{
-        struct blkdev_info *info = dev;
-
-        if (info->bd == NULL)
-                return 1;
-
-        *major = info->bd->major;
-        *name = info->bd->name;
-        return 0;
 }
 
+#endif /* CONFIG_PROC_FS */
 
 int register_blkdev(unsigned int major, const char *name)
 {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 4e1b849f912f..f3418f7a6e9d 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/smp_lock.h>
 #include <linux/devfs_fs_kernel.h>
+#include <linux/seq_file.h>
 
 #include <linux/kobject.h>
 #include <linux/kobj_map.h>
@@ -27,8 +28,6 @@
 
 static struct kobj_map *cdev_map;
 
-#define MAX_PROBE_HASH 255	/* random */
-
 static DEFINE_MUTEX(chrdevs_lock);
 
 static struct char_device_struct {
@@ -39,93 +38,29 @@ static struct char_device_struct {
 	char name[64];
 	struct file_operations *fops;
 	struct cdev *cdev;		/* will die */
-} *chrdevs[MAX_PROBE_HASH];
+} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
 
 /* index in the above */
 static inline int major_to_index(int major)
 {
-	return major % MAX_PROBE_HASH;
-}
-
-struct chrdev_info {
-	int index;
-	struct char_device_struct *cd;
-};
-
-void *get_next_chrdev(void *dev)
-{
-	struct chrdev_info *info;
-
-	if (dev == NULL) {
-		info = kmalloc(sizeof(*info), GFP_KERNEL);
-		if (!info)
-			goto out;
-		info->index=0;
-		info->cd = chrdevs[info->index];
-		if (info->cd)
-			goto out;
-	} else {
-		info = dev;
-	}
-
-	while (info->index < ARRAY_SIZE(chrdevs)) {
-		if (info->cd)
-			info->cd = info->cd->next;
-		if (info->cd)
-			goto out;
-		/*
-		 * No devices on this chain, move to the next
-		 */
-		info->index++;
-		info->cd = (info->index < ARRAY_SIZE(chrdevs)) ?
-			chrdevs[info->index] : NULL;
-		if (info->cd)
-			goto out;
-	}
-
-out:
-	return info;
-}
-
-void *acquire_chrdev_list(void)
-{
-	mutex_lock(&chrdevs_lock);
-	return get_next_chrdev(NULL);
-}
-
-void release_chrdev_list(void *dev)
-{
-	mutex_unlock(&chrdevs_lock);
-	kfree(dev);
+	return major % CHRDEV_MAJOR_HASH_SIZE;
 }
 
+#ifdef CONFIG_PROC_FS
 
-int count_chrdev_list(void)
+void chrdev_show(struct seq_file *f, off_t offset)
 {
 	struct char_device_struct *cd;
-	int i, count;
-
-	count = 0;
 
-	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-		for (cd = chrdevs[i]; cd; cd = cd->next)
-			count++;
+	if (offset < CHRDEV_MAJOR_HASH_SIZE) {
+		mutex_lock(&chrdevs_lock);
+		for (cd = chrdevs[offset]; cd; cd = cd->next)
+			seq_printf(f, "%3d %s\n", cd->major, cd->name);
+		mutex_unlock(&chrdevs_lock);
 	}
-
-	return count;
 }
 
-int get_chrdev_info(void *dev, int *major, char **name)
-{
-	struct chrdev_info *info = dev;
-
-	if (info->cd == NULL)
-		return 1;
-
-	*major = info->cd->major;
-	*name = info->cd->name;
-	return 0;
-}
+#endif /* CONFIG_PROC_FS */
 
 /*
  * Register a single major with a specified minor range.
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ef5a3323f4b5..5c10ea157425 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -249,144 +249,60 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
 	return seq_open(file, &cpuinfo_op);
 }
 
-enum devinfo_states {
-	CHR_HDR,
-	CHR_LIST,
-	BLK_HDR,
-	BLK_LIST,
-	DEVINFO_DONE
-};
-
-struct devinfo_state {
-	void *chrdev;
-	void *blkdev;
-	unsigned int num_records;
-	unsigned int cur_record;
-	enum devinfo_states state;
+static struct file_operations proc_cpuinfo_operations = {
+	.open		= cpuinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
 };
 
-static void *devinfo_start(struct seq_file *f, loff_t *pos)
+static int devinfo_show(struct seq_file *f, void *v)
 {
-	struct devinfo_state *info = f->private;
+	int i = *(loff_t *) v;
 
-	if (*pos) {
-		if ((info) && (*pos <= info->num_records))
-			return info;
-		return NULL;
+	if (i < CHRDEV_MAJOR_HASH_SIZE) {
+		if (i == 0)
+			seq_printf(f, "Character devices:\n");
+		chrdev_show(f, i);
+	} else {
+		i -= CHRDEV_MAJOR_HASH_SIZE;
+		if (i == 0)
+			seq_printf(f, "\nBlock devices:\n");
+		blkdev_show(f, i);
 	}
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
-	f->private = info;
-	info->chrdev = acquire_chrdev_list();
-	info->blkdev = acquire_blkdev_list();
-	info->state = CHR_HDR;
-	info->num_records = count_chrdev_list();
-	info->num_records += count_blkdev_list();
-	info->num_records += 2; /* Character and Block headers */
-	*pos = 1;
-	info->cur_record = *pos;
-	return info;
+	return 0;
 }
 
-static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
+static void *devinfo_start(struct seq_file *f, loff_t *pos)
 {
-	int idummy;
-	char *ndummy;
-	struct devinfo_state *info = f->private;
-
-	switch (info->state) {
-		case CHR_HDR:
-			info->state = CHR_LIST;
-			(*pos)++;
-			/*fallthrough*/
-		case CHR_LIST:
-			if (get_chrdev_info(info->chrdev,&idummy,&ndummy)) {
-				/*
-				 * The character dev list is complete
-				 */
-				info->state = BLK_HDR;
-			} else {
-				info->chrdev = get_next_chrdev(info->chrdev);
-			}
-			(*pos)++;
-			break;
-		case BLK_HDR:
-			info->state = BLK_LIST;
-			(*pos)++;
-			/*fallthrough*/
-		case BLK_LIST:
-			if (get_blkdev_info(info->blkdev,&idummy,&ndummy)) {
-				/*
-				 * The block dev list is complete
-				 */
-				info->state = DEVINFO_DONE;
-			} else {
-				info->blkdev = get_next_blkdev(info->blkdev);
-			}
-			(*pos)++;
-			break;
-		case DEVINFO_DONE:
-			(*pos)++;
-			info->cur_record = *pos;
-			info = NULL;
-			break;
-		default:
-			break;
-	}
-	if (info)
-		info->cur_record = *pos;
-	return info;
+	if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+		return pos;
+	return NULL;
 }
 
-static void devinfo_stop(struct seq_file *f, void *v)
+static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos)
 {
-	struct devinfo_state *info = f->private;
-
-	if (info) {
-		release_chrdev_list(info->chrdev);
-		release_blkdev_list(info->blkdev);
-		f->private = NULL;
-		kfree(info);
-	}
+	(*pos)++;
+	if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+		return NULL;
+	return pos;
 }
 
-static int devinfo_show(struct seq_file *f, void *arg)
-{
-	int major;
-	char *name;
-	struct devinfo_state *info = f->private;
-
-	switch(info->state) {
-		case CHR_HDR:
-			seq_printf(f,"Character devices:\n");
-			/* fallthrough */
-		case CHR_LIST:
-			if (!get_chrdev_info(info->chrdev,&major,&name))
-				seq_printf(f,"%3d %s\n",major,name);
-			break;
-		case BLK_HDR:
-			seq_printf(f,"\nBlock devices:\n");
-			/* fallthrough */
-		case BLK_LIST:
-			if (!get_blkdev_info(info->blkdev,&major,&name))
-				seq_printf(f,"%3d %s\n",major,name);
-			break;
-		default:
-			break;
-	}
-
-	return 0;
+static void devinfo_stop(struct seq_file *f, void *v)
+{
+	/* Nothing to do */
 }
 
-static  struct seq_operations devinfo_op = {
-	.start  = devinfo_start,
-	.next   = devinfo_next,
-	.stop   = devinfo_stop,
-	.show   = devinfo_show,
+static struct seq_operations devinfo_ops = {
+	.start = devinfo_start,
+	.next  = devinfo_next,
+	.stop  = devinfo_stop,
+	.show  = devinfo_show
 };
 
-static int devinfo_open(struct inode *inode, struct file *file)
+static int devinfo_open(struct inode *inode, struct file *filp)
 {
-	return seq_open(file, &devinfo_op);
+	return seq_open(filp, &devinfo_ops);
 }
 
 static struct file_operations proc_devinfo_operations = {
@@ -396,13 +312,6 @@ static struct file_operations proc_devinfo_operations = {
 	.release	= seq_release,
 };
 
-static struct file_operations proc_cpuinfo_operations = {
-	.open		= cpuinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 extern struct seq_operations vmstat_op;
 static int vmstat_open(struct inode *inode, struct file *file)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 20fa5f6d7269..20a7afd4590c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1413,6 +1413,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 #endif
 
 /* fs/char_dev.c */
+#define CHRDEV_MAJOR_HASH_SIZE	255
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
 extern int register_chrdev(unsigned int, const char *,
@@ -1420,25 +1421,17 @@ extern int register_chrdev(unsigned int, const char *,
 extern int unregister_chrdev(unsigned int, const char *);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern int chrdev_open(struct inode *, struct file *);
-extern int get_chrdev_list(char *);
-extern void *acquire_chrdev_list(void);
-extern int count_chrdev_list(void);
-extern void *get_next_chrdev(void *);
-extern int get_chrdev_info(void *, int *, char **);
-extern void release_chrdev_list(void *);
+extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
+#define BLKDEV_MAJOR_HASH_SIZE	255
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
-extern void *acquire_blkdev_list(void);
-extern int count_blkdev_list(void);
-extern void *get_next_blkdev(void *);
-extern int get_blkdev_info(void *, int *, char **);
-extern void release_blkdev_list(void *);
+extern void blkdev_show(struct seq_file *,off_t);
 
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
-- 
cgit v1.2.3


From 453823ba08ba762b3d58934b6dce75edce37169e Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Fri, 31 Mar 2006 02:30:39 -0800
Subject: [PATCH] IPMI: fix startup race condition

Matt Domsch noticed a startup race with the IPMI kernel thread, it was
possible (though extraordinarly unlikely) that a message could come in
before the upper layer was ready to handle it.  This patch splits the
startup processing of an IPMI interface into two parts, one to get ready
and one to actually start the processes to receive messages from the
interface.

[akpm@osdl.org: cleanups]
Signed-off-by: Corey Minyard <minyard@acm.org>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_msghandler.c |  9 +++---
 drivers/char/ipmi/ipmi_si_intf.c    | 59 +++++++++++++++++++++++--------------
 include/linux/ipmi_smi.h            | 16 ++++++++--
 3 files changed, 54 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 40eb005b9d77..a0b6f797d97d 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2305,8 +2305,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		      void		       *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *si_dev,
-		      unsigned char            slave_addr,
-		      ipmi_smi_t               *new_intf)
+		      unsigned char            slave_addr)
 {
 	int              i, j;
 	int              rv;
@@ -2388,9 +2387,9 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 	if (rv)
 		goto out;
 
-	/* FIXME - this is an ugly kludge, this sets the intf for the
-	   caller before sending any messages with it. */
-	*new_intf = intf;
+	rv = handlers->start_processing(send_info, intf);
+	if (rv)
+		goto out;
 
 	get_guid(intf);
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 35fbd4d8ed4b..d48d86bd2c2b 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -972,10 +972,37 @@ static irqreturn_t si_bt_irq_handler(int irq, void *data, struct pt_regs *regs)
 	return si_irq_handler(irq, data, regs);
 }
 
+static int smi_start_processing(void       *send_info,
+				ipmi_smi_t intf)
+{
+	struct smi_info *new_smi = send_info;
+
+	new_smi->intf = intf;
+
+	/* Set up the timer that drives the interface. */
+	setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+	new_smi->last_timeout_jiffies = jiffies;
+	mod_timer(&new_smi->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
+
+ 	if (new_smi->si_type != SI_BT) {
+		new_smi->thread = kthread_run(ipmi_thread, new_smi,
+					      "kipmi%d", new_smi->intf_num);
+		if (IS_ERR(new_smi->thread)) {
+			printk(KERN_NOTICE "ipmi_si_intf: Could not start"
+			       " kernel thread due to error %ld, only using"
+			       " timers to drive the interface\n",
+			       PTR_ERR(new_smi->thread));
+			new_smi->thread = NULL;
+		}
+	}
+
+	return 0;
+}
 
 static struct ipmi_smi_handlers handlers =
 {
 	.owner                  = THIS_MODULE,
+	.start_processing       = smi_start_processing,
 	.sender			= sender,
 	.request_events		= request_events,
 	.set_run_to_completion  = set_run_to_completion,
@@ -2162,9 +2189,13 @@ static void setup_xaction_handlers(struct smi_info *smi_info)
 
 static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 {
-	if (smi_info->thread != NULL && smi_info->thread != ERR_PTR(-ENOMEM))
-		kthread_stop(smi_info->thread);
-	del_timer_sync(&smi_info->si_timer);
+	if (smi_info->intf) {
+		/* The timer and thread are only running if the
+		   interface has been started up and registered. */
+		if (smi_info->thread != NULL)
+			kthread_stop(smi_info->thread);
+		del_timer_sync(&smi_info->si_timer);
+	}
 }
 
 static struct ipmi_default_vals
@@ -2341,21 +2372,6 @@ static int try_smi_init(struct smi_info *new_smi)
 	if (new_smi->irq)
 		new_smi->si_state = SI_CLEARING_FLAGS_THEN_SET_IRQ;
 
-	/* The ipmi_register_smi() code does some operations to
-	   determine the channel information, so we must be ready to
-	   handle operations before it is called.  This means we have
-	   to stop the timer if we get an error after this point. */
-	init_timer(&(new_smi->si_timer));
-	new_smi->si_timer.data = (long) new_smi;
-	new_smi->si_timer.function = smi_timeout;
-	new_smi->last_timeout_jiffies = jiffies;
-	new_smi->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
-
-	add_timer(&(new_smi->si_timer));
- 	if (new_smi->si_type != SI_BT)
-		new_smi->thread = kthread_run(ipmi_thread, new_smi,
-					      "kipmi%d", new_smi->intf_num);
-
 	if (!new_smi->dev) {
 		/* If we don't already have a device from something
 		 * else (like PCI), then register a new one. */
@@ -2365,7 +2381,7 @@ static int try_smi_init(struct smi_info *new_smi)
 			printk(KERN_ERR
 			       "ipmi_si_intf:"
 			       " Unable to allocate platform device\n");
-			goto out_err_stop_timer;
+			goto out_err;
 		}
 		new_smi->dev = &new_smi->pdev->dev;
 		new_smi->dev->driver = &ipmi_driver;
@@ -2377,7 +2393,7 @@ static int try_smi_init(struct smi_info *new_smi)
 			       " Unable to register system interface device:"
 			       " %d\n",
 			       rv);
-			goto out_err_stop_timer;
+			goto out_err;
 		}
 		new_smi->dev_registered = 1;
 	}
@@ -2386,8 +2402,7 @@ static int try_smi_init(struct smi_info *new_smi)
 			       new_smi,
 			       &new_smi->device_id,
 			       new_smi->dev,
-			       new_smi->slave_addr,
-			       &(new_smi->intf));
+			       new_smi->slave_addr);
 	if (rv) {
 		printk(KERN_ERR
 		       "ipmi_si: Unable to register device: error %d\n",
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 53571288a9fc..6d9c7e4da472 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -82,6 +82,13 @@ struct ipmi_smi_handlers
 {
 	struct module *owner;
 
+	/* The low-level interface cannot start sending messages to
+	   the upper layer until this function is called.  This may
+	   not be NULL, the lower layer must take the interface from
+	   this call. */
+	int (*start_processing)(void       *send_info,
+				ipmi_smi_t new_intf);
+
 	/* Called to enqueue an SMI message to be sent.  This
 	   operation is not allowed to fail.  If an error occurs, it
 	   should report back the error in a received message.  It may
@@ -157,13 +164,16 @@ static inline void ipmi_demangle_device_id(unsigned char *data,
 }
 
 /* Add a low-level interface to the IPMI driver.  Note that if the
-   interface doesn't know its slave address, it should pass in zero. */
+   interface doesn't know its slave address, it should pass in zero.
+   The low-level interface should not deliver any messages to the
+   upper layer until the start_processing() function in the handlers
+   is called, and the lower layer must get the interface from that
+   call. */
 int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		      void                     *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *dev,
-		      unsigned char            slave_addr,
-		      ipmi_smi_t               *intf);
+		      unsigned char            slave_addr);
 
 /*
  * Remove a low-level interface from the IPMI driver.  This will
-- 
cgit v1.2.3


From f79e2abb9bd452d97295f34376dedbec9686b986 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 31 Mar 2006 02:30:42 -0800
Subject: [PATCH] sys_sync_file_range()

Remove the recently-added LINUX_FADV_ASYNC_WRITE and LINUX_FADV_WRITE_WAIT
fadvise() additions, do it in a new sys_sync_file_range() syscall instead.
Reasons:

- It's more flexible.  Things which would require two or three syscalls with
  fadvise() can be done in a single syscall.

- Using fadvise() in this manner is something not covered by POSIX.

The patch wires up the syscall for x86.

The sycall is implemented in the new fs/sync.c.  The intention is that we can
move sys_fsync(), sys_fdatasync() and perhaps sys_sync() into there later.

Documentation for the syscall is in fs/sync.c.

A test app (sync_file_range.c) is in
http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz.

The available-to-GPL-modules do_sync_file_range() is for knfsd: "A COMMIT can
say NFS_DATA_SYNC or NFS_FILE_SYNC.  I can skip the ->fsync call for
NFS_DATA_SYNC which is hopefully the more common."

Note: the `async' writeout mode SYNC_FILE_RANGE_WRITE will turn synchronous if
the queue is congested.  This is trivial to fix: add a new flag bit, set
wbc->nonblocking.  But I'm not sure that we want to expose implementation
details down to that level.

Note: it's notable that we can sync an fd which wasn't opened for writing.
Same with fsync() and fdatasync()).

Note: the code takes some care to handle attempts to sync file contents
outside the 16TB offset on 32-bit machines.  It makes such attempts appear to
succeed, for best 32-bit/64-bit compatibility.  Perhaps it should make such
requests fail...

Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |   1 +
 fs/Makefile                      |   2 +-
 fs/sync.c                        | 164 +++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h        |   3 +-
 include/linux/fadvise.h          |   6 --
 include/linux/fs.h               |   7 ++
 include/linux/syscalls.h         |   2 +
 mm/fadvise.c                     |  20 -----
 8 files changed, 177 insertions(+), 28 deletions(-)
 create mode 100644 fs/sync.c

(limited to 'include')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index ce3ef4fa0551..4f58b9c0efe3 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -313,3 +313,4 @@ ENTRY(sys_call_table)
 	.long sys_set_robust_list
 	.long sys_get_robust_list
 	.long sys_splice
+	.long sys_sync_file_range
diff --git a/fs/Makefile b/fs/Makefile
index f3a4f7077175..83bf478e786b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o
+		ioprio.o pnode.o drop_caches.o splice.o sync.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
diff --git a/fs/sync.c b/fs/sync.c
new file mode 100644
index 000000000000..8616006d2094
--- /dev/null
+++ b/fs/sync.c
@@ -0,0 +1,164 @@
+/*
+ * High-level sync()-related operations
+ */
+
+#include <linux/kernel.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/writeback.h>
+#include <linux/syscalls.h>
+#include <linux/linkage.h>
+#include <linux/pagemap.h>
+
+#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
+			SYNC_FILE_RANGE_WAIT_AFTER)
+
+/*
+ * sys_sync_file_range() permits finely controlled syncing over a segment of
+ * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
+ * zero then sys_sync_file_range() will operate from offset out to EOF.
+ *
+ * The flag bits are:
+ *
+ * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
+ * before performing the write.
+ *
+ * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
+ * range which are not presently under writeback.
+ *
+ * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
+ * after performing the write.
+ *
+ * Useful combinations of the flag bits are:
+ *
+ * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
+ * in the range which were dirty on entry to sys_sync_file_range() are placed
+ * under writeout.  This is a start-write-for-data-integrity operation.
+ *
+ * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
+ * are not presently under writeout.  This is an asynchronous flush-to-disk
+ * operation.  Not suitable for data integrity operations.
+ *
+ * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
+ * completion of writeout of all pages in the range.  This will be used after an
+ * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
+ * for that operation to complete and to return the result.
+ *
+ * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
+ * a traditional sync() operation.  This is a write-for-data-integrity operation
+ * which will ensure that all pages in the range which were dirty on entry to
+ * sys_sync_file_range() are committed to disk.
+ *
+ *
+ * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
+ * I/O errors or ENOSPC conditions and will return those to the caller, after
+ * clearing the EIO and ENOSPC flags in the address_space.
+ *
+ * It should be noted that none of these operations write out the file's
+ * metadata.  So unless the application is strictly performing overwrites of
+ * already-instantiated disk blocks, there are no guarantees here that the data
+ * will be available after a crash.
+ */
+asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+					int flags)
+{
+	int ret;
+	struct file *file;
+	loff_t endbyte;			/* inclusive */
+	int fput_needed;
+	umode_t i_mode;
+
+	ret = -EINVAL;
+	if (flags & ~VALID_FLAGS)
+		goto out;
+
+	endbyte = offset + nbytes;
+
+	if ((s64)offset < 0)
+		goto out;
+	if ((s64)endbyte < 0)
+		goto out;
+	if (endbyte < offset)
+		goto out;
+
+	if (sizeof(pgoff_t) == 4) {
+		if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+			/*
+			 * The range starts outside a 32 bit machine's
+			 * pagecache addressing capabilities.  Let it "succeed"
+			 */
+			ret = 0;
+			goto out;
+		}
+		if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+			/*
+			 * Out to EOF
+			 */
+			nbytes = 0;
+		}
+	}
+
+	if (nbytes == 0)
+		endbyte = -1;
+	else
+		endbyte--;		/* inclusive */
+
+	ret = -EBADF;
+	file = fget_light(fd, &fput_needed);
+	if (!file)
+		goto out;
+
+	i_mode = file->f_dentry->d_inode->i_mode;
+	ret = -ESPIPE;
+	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
+			!S_ISLNK(i_mode))
+		goto out_put;
+
+	ret = do_sync_file_range(file, offset, endbyte, flags);
+out_put:
+	fput_light(file, fput_needed);
+out:
+	return ret;
+}
+
+/*
+ * `endbyte' is inclusive
+ */
+int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
+			int flags)
+{
+	int ret;
+	struct address_space *mapping;
+
+	mapping = file->f_mapping;
+	if (!mapping) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = 0;
+	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+		ret = wait_on_page_writeback_range(mapping,
+					offset >> PAGE_CACHE_SHIFT,
+					endbyte >> PAGE_CACHE_SHIFT);
+		if (ret < 0)
+			goto out;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WRITE) {
+		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
+						WB_SYNC_NONE);
+		if (ret < 0)
+			goto out;
+	}
+
+	if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
+		ret = wait_on_page_writeback_range(mapping,
+					offset >> PAGE_CACHE_SHIFT,
+					endbyte >> PAGE_CACHE_SHIFT);
+	}
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(do_sync_file_range);
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 789e9bdd0a40..2e7f3e257fdd 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -319,8 +319,9 @@
 #define __NR_set_robust_list	311
 #define __NR_get_robust_list	312
 #define __NR_sys_splice		313
+#define __NR_sys_sync_file_range 314
 
-#define NR_syscalls 314
+#define NR_syscalls 315
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
diff --git a/include/linux/fadvise.h b/include/linux/fadvise.h
index b2913bba35d8..e8e747139b9a 100644
--- a/include/linux/fadvise.h
+++ b/include/linux/fadvise.h
@@ -18,10 +18,4 @@
 #define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */
 #endif
 
-/*
- * Linux-specific fadvise() extensions:
- */
-#define LINUX_FADV_ASYNC_WRITE	32	/* Start writeout on range */
-#define LINUX_FADV_WRITE_WAIT	33	/* Wait upon writeout to range */
-
 #endif	/* FADVISE_H_INCLUDED */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 20a7afd4590c..4ed7e602d703 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -757,6 +757,13 @@ extern void send_sigio(struct fown_struct *fown, int fd, int band);
 extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
 extern int fcntl_getlease(struct file *filp);
 
+/* fs/sync.c */
+#define SYNC_FILE_RANGE_WAIT_BEFORE	1
+#define SYNC_FILE_RANGE_WRITE		2
+#define SYNC_FILE_RANGE_WAIT_AFTER	4
+extern int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte,
+			int flags);
+
 /* fs/locks.c */
 extern void locks_init_lock(struct file_lock *);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e78ffc7d5b56..5717147596b6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -571,5 +571,7 @@ asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
 asmlinkage long sys_unshare(unsigned long unshare_flags);
 asmlinkage long sys_splice(int fdin, int fdout, size_t len,
 				unsigned int flags);
+asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+					int flags);
 
 #endif
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 907c39257ca0..0a03357a1f8e 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -35,17 +35,6 @@
  *
  * LINUX_FADV_ASYNC_WRITE: push some or all of the dirty pages at the disk.
  *
- * LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE: push all of the currently
- * dirty pages at the disk.
- *
- * LINUX_FADV_WRITE_WAIT, LINUX_FADV_ASYNC_WRITE, LINUX_FADV_WRITE_WAIT: push
- * all of the currently dirty pages at the disk, wait until they have been
- * written.
- *
- * It should be noted that none of these operations write out the file's
- * metadata.  So unless the application is strictly performing overwrites of
- * already-instantiated disk blocks, there are no guarantees here that the data
- * will be available after a crash.
  */
 asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 {
@@ -129,15 +118,6 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 			invalidate_mapping_pages(mapping, start_index,
 						end_index);
 		break;
-	case LINUX_FADV_ASYNC_WRITE:
-		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
-						WB_SYNC_NONE);
-		break;
-	case LINUX_FADV_WRITE_WAIT:
-		ret = wait_on_page_writeback_range(mapping,
-					offset >> PAGE_CACHE_SHIFT,
-					endbyte >> PAGE_CACHE_SHIFT);
-		break;
 	default:
 		ret = -EINVAL;
 	}
-- 
cgit v1.2.3


From 2cf8d82d63807c2c68adf20bb28bf502496186dd Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 31 Mar 2006 02:30:49 -0800
Subject: [PATCH] make local_t signed

local_t's were defined to be unsigned.  This increases confusion because
atomic_t's are signed.  The patch goes through and changes all implementations
to use signed longs throughout.

Also, x86-64 was using 32-bit quantities for the value passed into local_add()
and local_sub().  Fixed.

All (actually, both) existing users have been audited.

(Also s/__inline__/inline/ in x86_64/local.h)

Cc: Andi Kleen <ak@muc.de>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/local.h | 13 ++++++++++---
 include/asm-i386/local.h    |  6 +++---
 include/asm-x86_64/local.h  | 10 +++++-----
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index de4614840c2c..9291c24f5819 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -7,8 +7,15 @@
 #include <asm/atomic.h>
 #include <asm/types.h>
 
-/* An unsigned long type for operations which are atomic for a single
- * CPU.  Usually used in combination with per-cpu variables. */
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic_long_t.  Which is
+ * rather pointless.  The whole point behind local_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU.  local_t allows exploitation of such capabilities.
+ */
 
 /* Implement in terms of atomics. */
 
@@ -20,7 +27,7 @@ typedef struct
 
 #define LOCAL_INIT(i)	{ ATOMIC_LONG_INIT(i) }
 
-#define local_read(l)	((unsigned long)atomic_long_read(&(l)->a))
+#define local_read(l)	atomic_long_read(&(l)->a)
 #define local_set(l,i)	atomic_long_set((&(l)->a),(i))
 #define local_inc(l)	atomic_long_inc(&(l)->a)
 #define local_dec(l)	atomic_long_dec(&(l)->a)
diff --git a/include/asm-i386/local.h b/include/asm-i386/local.h
index 0177da80dde3..e67fa08260fe 100644
--- a/include/asm-i386/local.h
+++ b/include/asm-i386/local.h
@@ -5,7 +5,7 @@
 
 typedef struct
 {
-	volatile unsigned long counter;
+	volatile long counter;
 } local_t;
 
 #define LOCAL_INIT(i)	{ (i) }
@@ -29,7 +29,7 @@ static __inline__ void local_dec(local_t *v)
 		:"m" (v->counter));
 }
 
-static __inline__ void local_add(unsigned long i, local_t *v)
+static __inline__ void local_add(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"addl %1,%0"
@@ -37,7 +37,7 @@ static __inline__ void local_add(unsigned long i, local_t *v)
 		:"ir" (i), "m" (v->counter));
 }
 
-static __inline__ void local_sub(unsigned long i, local_t *v)
+static __inline__ void local_sub(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"subl %1,%0"
diff --git a/include/asm-x86_64/local.h b/include/asm-x86_64/local.h
index bf148037d4e5..cd17945bf218 100644
--- a/include/asm-x86_64/local.h
+++ b/include/asm-x86_64/local.h
@@ -5,7 +5,7 @@
 
 typedef struct
 {
-	volatile unsigned long counter;
+	volatile long counter;
 } local_t;
 
 #define LOCAL_INIT(i)	{ (i) }
@@ -13,7 +13,7 @@ typedef struct
 #define local_read(v)	((v)->counter)
 #define local_set(v,i)	(((v)->counter) = (i))
 
-static __inline__ void local_inc(local_t *v)
+static inline void local_inc(local_t *v)
 {
 	__asm__ __volatile__(
 		"incq %0"
@@ -21,7 +21,7 @@ static __inline__ void local_inc(local_t *v)
 		:"m" (v->counter));
 }
 
-static __inline__ void local_dec(local_t *v)
+static inline void local_dec(local_t *v)
 {
 	__asm__ __volatile__(
 		"decq %0"
@@ -29,7 +29,7 @@ static __inline__ void local_dec(local_t *v)
 		:"m" (v->counter));
 }
 
-static __inline__ void local_add(unsigned int i, local_t *v)
+static inline void local_add(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"addq %1,%0"
@@ -37,7 +37,7 @@ static __inline__ void local_add(unsigned int i, local_t *v)
 		:"ir" (i), "m" (v->counter));
 }
 
-static __inline__ void local_sub(unsigned int i, local_t *v)
+static inline void local_sub(long i, local_t *v)
 {
 	__asm__ __volatile__(
 		"subq %1,%0"
-- 
cgit v1.2.3


From 0ca07731e495584bd84dca15a0f065470d594ec4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 31 Mar 2006 02:30:58 -0800
Subject: [PATCH] vt: add TIOCL_GETKMSGREDIRECT

Add TIOCL_GETKMSGREDIRECT needed by the userland suspend tool to get the
current value of kmsg_redirect from the kernel so that it can save it and
restore it after resume.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/vt.c     | 4 ++++
 include/linux/tiocl.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index ca4844c527da..acc5d47844eb 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2328,6 +2328,10 @@ int tioclinux(struct tty_struct *tty, unsigned long arg)
 		case TIOCL_SETVESABLANK:
 			set_vesa_blanking(p);
 			break;
+		case TIOCL_GETKMSGREDIRECT:
+			data = kmsg_redirect;
+			ret = __put_user(data, p);
+			break;
 		case TIOCL_SETKMSGREDIRECT:
 			if (!capable(CAP_SYS_ADMIN)) {
 				ret = -EPERM;
diff --git a/include/linux/tiocl.h b/include/linux/tiocl.h
index 2c9e847f6ed1..4756862c4ed4 100644
--- a/include/linux/tiocl.h
+++ b/include/linux/tiocl.h
@@ -34,5 +34,6 @@ struct tiocl_selection {
 #define TIOCL_SCROLLCONSOLE	13	/* scroll console */
 #define TIOCL_BLANKSCREEN	14	/* keep screen blank even if a key is pressed */
 #define TIOCL_BLANKEDSCREEN	15	/* return which vt was blanked */
+#define TIOCL_GETKMSGREDIRECT	17	/* get the vt the kernel messages are restricted to */
 
 #endif /* _LINUX_TIOCL_H */
-- 
cgit v1.2.3


From c72a1d608dd0eb3d553a08bfdf1c0041bebaa8a0 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 31 Mar 2006 02:31:04 -0800
Subject: [PATCH] LED: add LED class

Add the foundations of a new LEDs subsystem.  This patch adds a class which
presents LED devices within sysfs and allows their brightness to be
controlled.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/Kconfig         |   2 +
 drivers/Kconfig          |   2 +
 drivers/Makefile         |   1 +
 drivers/leds/Kconfig     |  18 ++++++
 drivers/leds/Makefile    |   4 ++
 drivers/leds/led-class.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/leds/led-core.c  |  25 ++++++++
 drivers/leds/leds.h      |  31 ++++++++++
 include/linux/leds.h     |  51 ++++++++++++++++
 9 files changed, 281 insertions(+)
 create mode 100644 drivers/leds/Kconfig
 create mode 100644 drivers/leds/Makefile
 create mode 100644 drivers/leds/led-class.c
 create mode 100644 drivers/leds/led-core.c
 create mode 100644 drivers/leds/leds.h
 create mode 100644 include/linux/leds.h

(limited to 'include')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba46d779ede7..e91db542eb01 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -839,6 +839,8 @@ source "drivers/misc/Kconfig"
 
 source "drivers/mfd/Kconfig"
 
+source "drivers/leds/Kconfig"
+
 source "drivers/media/Kconfig"
 
 source "drivers/video/Kconfig"
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9f5c0da57c90..5c91d6afb117 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -64,6 +64,8 @@ source "drivers/usb/Kconfig"
 
 source "drivers/mmc/Kconfig"
 
+source "drivers/leds/Kconfig"
+
 source "drivers/infiniband/Kconfig"
 
 source "drivers/sn/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 424955274e60..d6e8ffbd8132 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
+obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
new file mode 100644
index 000000000000..106f9aeca04f
--- /dev/null
+++ b/drivers/leds/Kconfig
@@ -0,0 +1,18 @@
+
+menu "LED devices"
+
+config NEW_LEDS
+	bool "LED Support"
+	help
+	  Say Y to enable Linux LED support.  This is not related to standard
+	  keyboard LEDs which are controlled via the input system.
+
+config LEDS_CLASS
+	tristate "LED Class Support"
+	depends NEW_LEDS
+	help
+	  This option enables the led sysfs class in /sys/class/leds.  You'll
+	  need this to do anything useful with LEDs.  If unsure, say N.
+
+endmenu
+
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
new file mode 100644
index 000000000000..8a62871437fa
--- /dev/null
+++ b/drivers/leds/Makefile
@@ -0,0 +1,4 @@
+
+# LED Core
+obj-$(CONFIG_NEW_LEDS)			+= led-core.o
+obj-$(CONFIG_LEDS_CLASS)		+= led-class.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
new file mode 100644
index 000000000000..0292df4101ba
--- /dev/null
+++ b/drivers/leds/led-class.c
@@ -0,0 +1,147 @@
+/*
+ * LED Class Core
+ *
+ * Copyright (C) 2005 John Lenz <lenz@cs.wisc.edu>
+ * Copyright (C) 2005-2006 Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/sysdev.h>
+#include <linux/timer.h>
+#include <linux/err.h>
+#include <linux/leds.h>
+#include "leds.h"
+
+static struct class *leds_class;
+
+static ssize_t led_brightness_show(struct class_device *dev, char *buf)
+{
+	struct led_classdev *led_cdev = class_get_devdata(dev);
+	ssize_t ret = 0;
+
+	/* no lock needed for this */
+	sprintf(buf, "%u\n", led_cdev->brightness);
+	ret = strlen(buf) + 1;
+
+	return ret;
+}
+
+static ssize_t led_brightness_store(struct class_device *dev,
+				const char *buf, size_t size)
+{
+	struct led_classdev *led_cdev = class_get_devdata(dev);
+	ssize_t ret = -EINVAL;
+	char *after;
+	unsigned long state = simple_strtoul(buf, &after, 10);
+
+	if (after - buf > 0) {
+		ret = after - buf;
+		led_set_brightness(led_cdev, state);
+	}
+
+	return ret;
+}
+
+static CLASS_DEVICE_ATTR(brightness, 0644, led_brightness_show,
+			led_brightness_store);
+
+/**
+ * led_classdev_suspend - suspend an led_classdev.
+ * @led_cdev: the led_classdev to suspend.
+ */
+void led_classdev_suspend(struct led_classdev *led_cdev)
+{
+	led_cdev->flags |= LED_SUSPENDED;
+	led_cdev->brightness_set(led_cdev, 0);
+}
+EXPORT_SYMBOL_GPL(led_classdev_suspend);
+
+/**
+ * led_classdev_resume - resume an led_classdev.
+ * @led_cdev: the led_classdev to resume.
+ */
+void led_classdev_resume(struct led_classdev *led_cdev)
+{
+	led_cdev->brightness_set(led_cdev, led_cdev->brightness);
+	led_cdev->flags &= ~LED_SUSPENDED;
+}
+EXPORT_SYMBOL_GPL(led_classdev_resume);
+
+/**
+ * led_classdev_register - register a new object of led_classdev class.
+ * @dev: The device to register.
+ * @led_cdev: the led_classdev structure for this device.
+ */
+int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
+{
+	led_cdev->class_dev = class_device_create(leds_class, NULL, 0,
+						parent, "%s", led_cdev->name);
+	if (unlikely(IS_ERR(led_cdev->class_dev)))
+		return PTR_ERR(led_cdev->class_dev);
+
+	class_set_devdata(led_cdev->class_dev, led_cdev);
+
+	/* register the attributes */
+	class_device_create_file(led_cdev->class_dev,
+				&class_device_attr_brightness);
+
+	/* add to the list of leds */
+	write_lock(&leds_list_lock);
+	list_add_tail(&led_cdev->node, &leds_list);
+	write_unlock(&leds_list_lock);
+
+	printk(KERN_INFO "Registered led device: %s\n",
+			led_cdev->class_dev->class_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(led_classdev_register);
+
+/**
+ * led_classdev_unregister - unregisters a object of led_properties class.
+ * @led_cdev: the led device to unreigister
+ *
+ * Unregisters a previously registered via led_classdev_register object.
+ */
+void led_classdev_unregister(struct led_classdev *led_cdev)
+{
+	class_device_remove_file(led_cdev->class_dev,
+				&class_device_attr_brightness);
+
+	class_device_unregister(led_cdev->class_dev);
+
+	write_lock(&leds_list_lock);
+	list_del(&led_cdev->node);
+	write_unlock(&leds_list_lock);
+}
+EXPORT_SYMBOL_GPL(led_classdev_unregister);
+
+static int __init leds_init(void)
+{
+	leds_class = class_create(THIS_MODULE, "leds");
+	if (IS_ERR(leds_class))
+		return PTR_ERR(leds_class);
+	return 0;
+}
+
+static void __exit leds_exit(void)
+{
+	class_destroy(leds_class);
+}
+
+subsys_initcall(leds_init);
+module_exit(leds_exit);
+
+MODULE_AUTHOR("John Lenz, Richard Purdie");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LED Class Interface");
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
new file mode 100644
index 000000000000..fe6541326c71
--- /dev/null
+++ b/drivers/leds/led-core.c
@@ -0,0 +1,25 @@
+/*
+ * LED Class Core
+ *
+ * Copyright 2005-2006 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/leds.h>
+#include "leds.h"
+
+rwlock_t leds_list_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(leds_list);
+
+EXPORT_SYMBOL_GPL(leds_list);
+EXPORT_SYMBOL_GPL(leds_list_lock);
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
new file mode 100644
index 000000000000..d26ca2f7e722
--- /dev/null
+++ b/drivers/leds/leds.h
@@ -0,0 +1,31 @@
+/*
+ * LED Core
+ *
+ * Copyright 2005 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __LEDS_H_INCLUDED
+#define __LEDS_H_INCLUDED
+
+#include <linux/leds.h>
+
+static inline void led_set_brightness(struct led_classdev *led_cdev,
+					enum led_brightness value)
+{
+	if (value > LED_FULL)
+		value = LED_FULL;
+	led_cdev->brightness = value;
+	if (!(led_cdev->flags & LED_SUSPENDED))
+		led_cdev->brightness_set(led_cdev, value);
+}
+
+extern rwlock_t leds_list_lock;
+extern struct list_head leds_list;
+
+#endif	/* __LEDS_H_INCLUDED */
diff --git a/include/linux/leds.h b/include/linux/leds.h
new file mode 100644
index 000000000000..6812640b39cc
--- /dev/null
+++ b/include/linux/leds.h
@@ -0,0 +1,51 @@
+/*
+ * Driver model for leds and led triggers
+ *
+ * Copyright (C) 2005 John Lenz <lenz@cs.wisc.edu>
+ * Copyright (C) 2005 Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __LINUX_LEDS_H_INCLUDED
+#define __LINUX_LEDS_H_INCLUDED
+
+struct device;
+struct class_device;
+/*
+ * LED Core
+ */
+
+enum led_brightness {
+	LED_OFF = 0,
+	LED_HALF = 127,
+	LED_FULL = 255,
+};
+
+struct led_classdev {
+	const char *name;
+	int brightness;
+	int flags;
+#define LED_SUSPENDED       (1 << 0)
+
+	/* A function to set the brightness of the led */
+	void (*brightness_set)(struct led_classdev *led_cdev,
+				enum led_brightness brightness);
+
+	struct class_device *class_dev;
+	/* LED Device linked list */
+	struct list_head node;
+
+	/* Trigger data */
+	char *default_trigger;
+};
+
+extern int led_classdev_register(struct device *parent,
+				struct led_classdev *led_cdev);
+extern void led_classdev_unregister(struct led_classdev *led_cdev);
+extern void led_classdev_suspend(struct led_classdev *led_cdev);
+extern void led_classdev_resume(struct led_classdev *led_cdev);
+
+#endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From c3bc9956ec52fb2c70f29aa894d8eec766116584 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 31 Mar 2006 02:31:05 -0800
Subject: [PATCH] LED: add LED trigger tupport

Add support for LED triggers to the LED subsystem.  "Triggers" are events
which change the state of an LED.  Two kinds of trigger are available, simple
ones which can be added to exising code with minimum disruption and complex
ones for implementing new or more complex functionality.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/leds/Kconfig        |   8 ++
 drivers/leds/Makefile       |   1 +
 drivers/leds/led-class.c    |  20 ++++
 drivers/leds/led-triggers.c | 239 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/leds/leds.h         |  13 +++
 include/linux/leds.h        |  52 ++++++++++
 6 files changed, 333 insertions(+)
 create mode 100644 drivers/leds/led-triggers.c

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 106f9aeca04f..a4d12ecaf6a5 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -14,5 +14,13 @@ config LEDS_CLASS
 	  This option enables the led sysfs class in /sys/class/leds.  You'll
 	  need this to do anything useful with LEDs.  If unsure, say N.
 
+config LEDS_TRIGGERS
+	bool "LED Trigger support"
+	depends NEW_LEDS
+	help
+	  This option enables trigger support for the leds class.
+	  These triggers allow kernel events to drive the LEDs and can
+	  be configured via sysfs. If unsure, say Y.
+
 endmenu
 
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 8a62871437fa..a9d8becd6189 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -2,3 +2,4 @@
 # LED Core
 obj-$(CONFIG_NEW_LEDS)			+= led-core.o
 obj-$(CONFIG_LEDS_CLASS)		+= led-class.o
+obj-$(CONFIG_LEDS_TRIGGERS)		+= led-triggers.o
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 0292df4101ba..b0b5d05fadd6 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -54,6 +54,9 @@ static ssize_t led_brightness_store(struct class_device *dev,
 
 static CLASS_DEVICE_ATTR(brightness, 0644, led_brightness_show,
 			led_brightness_store);
+#ifdef CONFIG_LEDS_TRIGGERS
+static CLASS_DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store);
+#endif
 
 /**
  * led_classdev_suspend - suspend an led_classdev.
@@ -100,6 +103,15 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 	list_add_tail(&led_cdev->node, &leds_list);
 	write_unlock(&leds_list_lock);
 
+#ifdef CONFIG_LEDS_TRIGGERS
+	rwlock_init(&led_cdev->trigger_lock);
+
+	led_trigger_set_default(led_cdev);
+
+	class_device_create_file(led_cdev->class_dev,
+				&class_device_attr_trigger);
+#endif
+
 	printk(KERN_INFO "Registered led device: %s\n",
 			led_cdev->class_dev->class_id);
 
@@ -117,6 +129,14 @@ void led_classdev_unregister(struct led_classdev *led_cdev)
 {
 	class_device_remove_file(led_cdev->class_dev,
 				&class_device_attr_brightness);
+#ifdef CONFIG_LEDS_TRIGGERS
+	class_device_remove_file(led_cdev->class_dev,
+				&class_device_attr_trigger);
+	write_lock(&led_cdev->trigger_lock);
+	if (led_cdev->trigger)
+		led_trigger_set(led_cdev, NULL);
+	write_unlock(&led_cdev->trigger_lock);
+#endif
 
 	class_device_unregister(led_cdev->class_dev);
 
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
new file mode 100644
index 000000000000..5e2cd8be1191
--- /dev/null
+++ b/drivers/leds/led-triggers.c
@@ -0,0 +1,239 @@
+/*
+ * LED Triggers Core
+ *
+ * Copyright 2005-2006 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/sysdev.h>
+#include <linux/timer.h>
+#include <linux/leds.h>
+#include "leds.h"
+
+/*
+ * Nests outside led_cdev->trigger_lock
+ */
+static rwlock_t triggers_list_lock = RW_LOCK_UNLOCKED;
+static LIST_HEAD(trigger_list);
+
+ssize_t led_trigger_store(struct class_device *dev, const char *buf,
+			size_t count)
+{
+	struct led_classdev *led_cdev = class_get_devdata(dev);
+	char trigger_name[TRIG_NAME_MAX];
+	struct led_trigger *trig;
+	size_t len;
+
+	trigger_name[sizeof(trigger_name) - 1] = '\0';
+	strncpy(trigger_name, buf, sizeof(trigger_name) - 1);
+	len = strlen(trigger_name);
+
+	if (len && trigger_name[len - 1] == '\n')
+		trigger_name[len - 1] = '\0';
+
+	if (!strcmp(trigger_name, "none")) {
+		write_lock(&led_cdev->trigger_lock);
+		led_trigger_set(led_cdev, NULL);
+		write_unlock(&led_cdev->trigger_lock);
+		return count;
+	}
+
+	read_lock(&triggers_list_lock);
+	list_for_each_entry(trig, &trigger_list, next_trig) {
+		if (!strcmp(trigger_name, trig->name)) {
+			write_lock(&led_cdev->trigger_lock);
+			led_trigger_set(led_cdev, trig);
+			write_unlock(&led_cdev->trigger_lock);
+
+			read_unlock(&triggers_list_lock);
+			return count;
+		}
+	}
+	read_unlock(&triggers_list_lock);
+
+	return -EINVAL;
+}
+
+
+ssize_t led_trigger_show(struct class_device *dev, char *buf)
+{
+	struct led_classdev *led_cdev = class_get_devdata(dev);
+	struct led_trigger *trig;
+	int len = 0;
+
+	read_lock(&triggers_list_lock);
+	read_lock(&led_cdev->trigger_lock);
+
+	if (!led_cdev->trigger)
+		len += sprintf(buf+len, "[none] ");
+	else
+		len += sprintf(buf+len, "none ");
+
+	list_for_each_entry(trig, &trigger_list, next_trig) {
+		if (led_cdev->trigger && !strcmp(led_cdev->trigger->name,
+							trig->name))
+			len += sprintf(buf+len, "[%s] ", trig->name);
+		else
+			len += sprintf(buf+len, "%s ", trig->name);
+	}
+	read_unlock(&led_cdev->trigger_lock);
+	read_unlock(&triggers_list_lock);
+
+	len += sprintf(len+buf, "\n");
+	return len;
+}
+
+void led_trigger_event(struct led_trigger *trigger,
+			enum led_brightness brightness)
+{
+	struct list_head *entry;
+
+	if (!trigger)
+		return;
+
+	read_lock(&trigger->leddev_list_lock);
+	list_for_each(entry, &trigger->led_cdevs) {
+		struct led_classdev *led_cdev;
+
+		led_cdev = list_entry(entry, struct led_classdev, trig_list);
+		led_set_brightness(led_cdev, brightness);
+	}
+	read_unlock(&trigger->leddev_list_lock);
+}
+
+/* Caller must ensure led_cdev->trigger_lock held */
+void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger)
+{
+	unsigned long flags;
+
+	/* Remove any existing trigger */
+	if (led_cdev->trigger) {
+		write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
+		list_del(&led_cdev->trig_list);
+		write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
+		if (led_cdev->trigger->deactivate)
+			led_cdev->trigger->deactivate(led_cdev);
+	}
+	if (trigger) {
+		write_lock_irqsave(&trigger->leddev_list_lock, flags);
+		list_add_tail(&led_cdev->trig_list, &trigger->led_cdevs);
+		write_unlock_irqrestore(&trigger->leddev_list_lock, flags);
+		if (trigger->activate)
+			trigger->activate(led_cdev);
+	}
+	led_cdev->trigger = trigger;
+}
+
+void led_trigger_set_default(struct led_classdev *led_cdev)
+{
+	struct led_trigger *trig;
+
+	if (!led_cdev->default_trigger)
+		return;
+
+	read_lock(&triggers_list_lock);
+	write_lock(&led_cdev->trigger_lock);
+	list_for_each_entry(trig, &trigger_list, next_trig) {
+		if (!strcmp(led_cdev->default_trigger, trig->name))
+			led_trigger_set(led_cdev, trig);
+	}
+	write_unlock(&led_cdev->trigger_lock);
+	read_unlock(&triggers_list_lock);
+}
+
+int led_trigger_register(struct led_trigger *trigger)
+{
+	struct led_classdev *led_cdev;
+
+	rwlock_init(&trigger->leddev_list_lock);
+	INIT_LIST_HEAD(&trigger->led_cdevs);
+
+	/* Add to the list of led triggers */
+	write_lock(&triggers_list_lock);
+	list_add_tail(&trigger->next_trig, &trigger_list);
+	write_unlock(&triggers_list_lock);
+
+	/* Register with any LEDs that have this as a default trigger */
+	read_lock(&leds_list_lock);
+	list_for_each_entry(led_cdev, &leds_list, node) {
+		write_lock(&led_cdev->trigger_lock);
+		if (!led_cdev->trigger && led_cdev->default_trigger &&
+			    !strcmp(led_cdev->default_trigger, trigger->name))
+			led_trigger_set(led_cdev, trigger);
+		write_unlock(&led_cdev->trigger_lock);
+	}
+	read_unlock(&leds_list_lock);
+
+	return 0;
+}
+
+void led_trigger_register_simple(const char *name, struct led_trigger **tp)
+{
+	struct led_trigger *trigger;
+
+	trigger = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
+
+	if (trigger) {
+		trigger->name = name;
+		led_trigger_register(trigger);
+	}
+	*tp = trigger;
+}
+
+void led_trigger_unregister(struct led_trigger *trigger)
+{
+	struct led_classdev *led_cdev;
+
+	/* Remove from the list of led triggers */
+	write_lock(&triggers_list_lock);
+	list_del(&trigger->next_trig);
+	write_unlock(&triggers_list_lock);
+
+	/* Remove anyone actively using this trigger */
+	read_lock(&leds_list_lock);
+	list_for_each_entry(led_cdev, &leds_list, node) {
+		write_lock(&led_cdev->trigger_lock);
+		if (led_cdev->trigger == trigger)
+			led_trigger_set(led_cdev, NULL);
+		write_unlock(&led_cdev->trigger_lock);
+	}
+	read_unlock(&leds_list_lock);
+}
+
+void led_trigger_unregister_simple(struct led_trigger *trigger)
+{
+	led_trigger_unregister(trigger);
+	kfree(trigger);
+}
+
+/* Used by LED Class */
+EXPORT_SYMBOL_GPL(led_trigger_set);
+EXPORT_SYMBOL_GPL(led_trigger_set_default);
+EXPORT_SYMBOL_GPL(led_trigger_show);
+EXPORT_SYMBOL_GPL(led_trigger_store);
+
+/* LED Trigger Interface */
+EXPORT_SYMBOL_GPL(led_trigger_register);
+EXPORT_SYMBOL_GPL(led_trigger_unregister);
+
+/* Simple LED Tigger Interface */
+EXPORT_SYMBOL_GPL(led_trigger_register_simple);
+EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);
+EXPORT_SYMBOL_GPL(led_trigger_event);
+
+MODULE_AUTHOR("Richard Purdie");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LED Triggers Core");
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index d26ca2f7e722..a715c4ed93ff 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -28,4 +28,17 @@ static inline void led_set_brightness(struct led_classdev *led_cdev,
 extern rwlock_t leds_list_lock;
 extern struct list_head leds_list;
 
+#ifdef CONFIG_LEDS_TRIGGERS
+void led_trigger_set_default(struct led_classdev *led_cdev);
+void led_trigger_set(struct led_classdev *led_cdev,
+			struct led_trigger *trigger);
+#else
+#define led_trigger_set_default(x) do {} while(0)
+#define led_trigger_set(x, y) do {} while(0)
+#endif
+
+ssize_t led_trigger_store(struct class_device *dev, const char *buf,
+			size_t count);
+ssize_t led_trigger_show(struct class_device *dev, char *buf);
+
 #endif	/* __LEDS_H_INCLUDED */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6812640b39cc..404575c3dd5a 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -40,6 +40,14 @@ struct led_classdev {
 
 	/* Trigger data */
 	char *default_trigger;
+#ifdef CONFIG_LEDS_TRIGGERS
+	rwlock_t trigger_lock;
+	/* Protects the trigger data below */
+
+	struct led_trigger *trigger;
+	struct list_head trig_list;
+	void *trigger_data;
+#endif
 };
 
 extern int led_classdev_register(struct device *parent,
@@ -48,4 +56,48 @@ extern void led_classdev_unregister(struct led_classdev *led_cdev);
 extern void led_classdev_suspend(struct led_classdev *led_cdev);
 extern void led_classdev_resume(struct led_classdev *led_cdev);
 
+/*
+ * LED Triggers
+ */
+#ifdef CONFIG_LEDS_TRIGGERS
+
+#define TRIG_NAME_MAX 50
+
+struct led_trigger {
+	/* Trigger Properties */
+	const char *name;
+	void (*activate)(struct led_classdev *led_cdev);
+	void (*deactivate)(struct led_classdev *led_cdev);
+
+	/* LEDs under control by this trigger (for simple triggers) */
+	rwlock_t leddev_list_lock;
+	struct list_head led_cdevs;
+
+	/* Link to next registered trigger */
+	struct list_head next_trig;
+};
+
+/* Registration functions for complex triggers */
+extern int led_trigger_register(struct led_trigger *trigger);
+extern void led_trigger_unregister(struct led_trigger *trigger);
+
+/* Registration functions for simple triggers */
+#define DEFINE_LED_TRIGGER(x)		static struct led_trigger *x;
+#define DEFINE_LED_TRIGGER_GLOBAL(x)	struct led_trigger *x;
+extern void led_trigger_register_simple(const char *name,
+				struct led_trigger **trigger);
+extern void led_trigger_unregister_simple(struct led_trigger *trigger);
+extern void led_trigger_event(struct led_trigger *trigger,
+				enum led_brightness event);
+
+#else
+
+/* Triggers aren't active - null macros */
+#define DEFINE_LED_TRIGGER(x)
+#define DEFINE_LED_TRIGGER_GLOBAL(x)
+#define led_trigger_register_simple(x, y) do {} while(0)
+#define led_trigger_unregister_simple(x) do {} while(0)
+#define led_trigger_event(x, y) do {} while(0)
+
+#endif
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From 2bfb646cdf348cb77c572f06d5b9d17ea205c7e2 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 31 Mar 2006 02:31:16 -0800
Subject: [PATCH] LED: Add IDE disk activity LED trigger

Add an LED trigger for IDE disk activity to the ide-disk driver.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Acked-by: Bartlomiej Zolnierkiewicz <B.Zolnierkiewicz@elka.pw.edu.pl>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/ide/ide-disk.c          |  3 ++
 drivers/leds/Kconfig            |  7 +++++
 drivers/leds/Makefile           |  1 +
 drivers/leds/ledtrig-ide-disk.c | 62 +++++++++++++++++++++++++++++++++++++++++
 include/linux/leds.h            |  8 ++++++
 5 files changed, 81 insertions(+)
 create mode 100644 drivers/leds/ledtrig-ide-disk.c

(limited to 'include')

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index ccf528d733bf..a5017de72da5 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -61,6 +61,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/leds.h>
 
 #define _IDE_DISK
 
@@ -317,6 +318,8 @@ static ide_startstop_t ide_do_rw_disk (ide_drive_t *drive, struct request *rq, s
 		return ide_stopped;
 	}
 
+	ledtrig_ide_activity();
+
 	pr_debug("%s: %sing: block=%llu, sectors=%lu, buffer=0x%08lx\n",
 		 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
 		 (unsigned long long)block, rq->nr_sectors,
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 64dcdd3185c0..2c4f20b7f021 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -66,5 +66,12 @@ config LEDS_TRIGGER_TIMER
 	  This allows LEDs to be controlled by a programmable timer
 	  via sysfs. If unsure, say Y.
 
+config LEDS_TRIGGER_IDE_DISK
+	bool "LED Timer Trigger"
+	depends LEDS_TRIGGERS && BLK_DEV_IDEDISK
+	help
+	  This allows LEDs to be controlled by IDE disk activity.
+	  If unsure, say Y.
+
 endmenu
 
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 9d2930f89d03..40699d3cabbf 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_LEDS_TOSA)			+= leds-tosa.o
 
 # LED Triggers
 obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
+obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK)	+= ledtrig-ide-disk.o
diff --git a/drivers/leds/ledtrig-ide-disk.c b/drivers/leds/ledtrig-ide-disk.c
new file mode 100644
index 000000000000..fa651886ab4f
--- /dev/null
+++ b/drivers/leds/ledtrig-ide-disk.c
@@ -0,0 +1,62 @@
+/*
+ * LED IDE-Disk Activity Trigger
+ *
+ * Copyright 2006 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/leds.h>
+
+static void ledtrig_ide_timerfunc(unsigned long data);
+
+DEFINE_LED_TRIGGER(ledtrig_ide);
+static DEFINE_TIMER(ledtrig_ide_timer, ledtrig_ide_timerfunc, 0, 0);
+static int ide_activity;
+static int ide_lastactivity;
+
+void ledtrig_ide_activity(void)
+{
+	ide_activity++;
+	if (!timer_pending(&ledtrig_ide_timer))
+		mod_timer(&ledtrig_ide_timer, jiffies + msecs_to_jiffies(10));
+}
+EXPORT_SYMBOL(ledtrig_ide_activity);
+
+static void ledtrig_ide_timerfunc(unsigned long data)
+{
+	if (ide_lastactivity != ide_activity) {
+		ide_lastactivity = ide_activity;
+		led_trigger_event(ledtrig_ide, LED_FULL);
+	    	mod_timer(&ledtrig_ide_timer, jiffies + msecs_to_jiffies(10));
+	} else {
+		led_trigger_event(ledtrig_ide, LED_OFF);
+	}
+}
+
+static int __init ledtrig_ide_init(void)
+{
+	led_trigger_register_simple("ide-disk", &ledtrig_ide);
+	return 0;
+}
+
+static void __exit ledtrig_ide_exit(void)
+{
+	led_trigger_unregister_simple(ledtrig_ide);
+}
+
+module_init(ledtrig_ide_init);
+module_exit(ledtrig_ide_exit);
+
+MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>");
+MODULE_DESCRIPTION("LED IDE Disk Activity Trigger");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 404575c3dd5a..4617e75903b0 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -100,4 +100,12 @@ extern void led_trigger_event(struct led_trigger *trigger,
 #define led_trigger_event(x, y) do {} while(0)
 
 #endif
+
+/* Trigger specific functions */
+#ifdef CONFIG_LEDS_TRIGGER_IDE_DISK
+extern void ledtrig_ide_activity(void);
+#else
+#define ledtrig_ide_activity() do {} while(0)
+#endif
+
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From 00362e33f65f1cb5d15e62ea5509520ce2770360 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 31 Mar 2006 02:31:17 -0800
Subject: [PATCH] hrtimer: create generic sleeper

The removal of the data field in the hrtimer structure enforces the
embedding of the timer into another data structure.  nanosleep now uses a
private implementation of the most common used timer callback function
(simple task wakeup).

In order to avoid the reimplentation of such functionality all over the
place a generic hrtimer_sleeper functionality is created.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h | 16 ++++++++++++++++
 kernel/hrtimer.c        | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 93830158348e..b20939287613 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -57,6 +57,19 @@ struct hrtimer {
 	struct hrtimer_base	*base;
 };
 
+/**
+ * struct hrtimer_sleeper - simple sleeper structure
+ *
+ * @timer:	embedded timer structure
+ * @task:	task to wake up
+ *
+ * task is set to NULL, when the timer expires.
+ */
+struct hrtimer_sleeper {
+	struct hrtimer timer;
+	struct task_struct *task;
+};
+
 /**
  * struct hrtimer_base - the timer base for a specific clock
  *
@@ -127,6 +140,9 @@ extern long hrtimer_nanosleep(struct timespec *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				 struct task_struct *tsk);
+
 /* Soft interrupt function to run the hrtimer queues: */
 extern void hrtimer_run_queues(void);
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0237a556eb1f..877cdf9678bf 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -656,6 +656,25 @@ void hrtimer_run_queues(void)
  * Sleep related functions:
  */
 
+static int hrtimer_wakeup(struct hrtimer *timer)
+{
+	struct hrtimer_sleeper *t =
+		container_of(timer, struct hrtimer_sleeper, timer);
+	struct task_struct *task = t->task;
+
+	t->task = NULL;
+	if (task)
+		wake_up_process(task);
+
+	return HRTIMER_NORESTART;
+}
+
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task)
+{
+	sl->timer.function = hrtimer_wakeup;
+	sl->task = task;
+}
+
 struct sleep_hrtimer {
 	struct hrtimer timer;
 	struct task_struct *task;
-- 
cgit v1.2.3


From db1b1fefc2cecbff2e4214062fa8c680cb6e7b7d Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Fri, 31 Mar 2006 02:31:21 -0800
Subject: [PATCH] sched: reduce overhead of calc_load

Currently, count_active_tasks() calls both nr_running() &
nr_interruptible().  Each of these functions does a "for_each_cpu" & reads
values from the runqueue of each cpu.  Although this is not a lot of
instructions, each runqueue may be located on different node.  Depending on
the architecture, a unique TLB entry may be required to access each
runqueue.

Since there may be more runqueues than cpu TLB entries, a scan of all
runqueues can trash the TLB.  Each memory reference incurs a TLB miss &
refill.

In addition, the runqueue cacheline that contains nr_running &
nr_uninterruptible may be evicted from the cache between the two passes.
This causes unnecessary cache misses.

Combining nr_running() & nr_interruptible() into a single function
substantially reduces the TLB & cache misses on large systems.  This should
have no measureable effect on smaller systems.

On a 128p IA64 system running a memory stress workload, the new function
reduced the overhead of calc_load() from 605 usec/call to 324 usec/call.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 15 +++++++++++++++
 kernel/timer.c        |  2 +-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d04186d8cc68..ab84adf5bb9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -100,6 +100,7 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
+extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
 
 #include <linux/time.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..6e52e0adff80 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1658,6 +1658,21 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
+unsigned long nr_active(void)
+{
+	unsigned long i, running = 0, uninterruptible = 0;
+
+	for_each_online_cpu(i) {
+		running += cpu_rq(i)->nr_running;
+		uninterruptible += cpu_rq(i)->nr_uninterruptible;
+	}
+
+	if (unlikely((long)uninterruptible < 0))
+		uninterruptible = 0;
+
+	return running + uninterruptible;
+}
+
 #ifdef CONFIG_SMP
 
 /*
diff --git a/kernel/timer.c b/kernel/timer.c
index 9062a82ee8ec..6b812c04737b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -825,7 +825,7 @@ void update_process_times(int user_tick)
  */
 static unsigned long count_active_tasks(void)
 {
-	return (nr_running() + nr_uninterruptible()) * FIXED_1;
+	return nr_active() * FIXED_1;
 }
 
 /*
-- 
cgit v1.2.3


From 3dee386e14045484a6c41c8f03a263f9d79de740 Mon Sep 17 00:00:00 2001
From: Con Kolivas <kernel@kolivas.org>
Date: Fri, 31 Mar 2006 02:31:23 -0800
Subject: [PATCH] sched: cleanup task_activated()

The activated flag in task_struct is used to track different sleep types and
its usage is somewhat obfuscated.  Convert the variable to an enum with more
descriptive names without altering the function.

Signed-off-by: Con Kolivas <kernel@kolivas.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  9 ++++++++-
 kernel/sched.c        | 24 +++++++++++++++---------
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab84adf5bb9a..c4fd3fcd3feb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -684,6 +684,13 @@ static inline void prefetch_stack(struct task_struct *t) { }
 struct audit_context;		/* See audit.c */
 struct mempolicy;
 
+enum sleep_type {
+	SLEEP_NORMAL,
+	SLEEP_NONINTERACTIVE,
+	SLEEP_INTERACTIVE,
+	SLEEP_INTERRUPTED,
+};
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -706,7 +713,7 @@ struct task_struct {
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
 	unsigned long long sched_time; /* sched_clock time spent running */
-	int activated;
+	enum sleep_type sleep_type;
 
 	unsigned long policy;
 	cpumask_t cpus_allowed;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6e52e0adff80..f55ce5adac55 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -704,7 +704,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
 		 * prevent them suddenly becoming cpu hogs and starving
 		 * other processes.
 		 */
-		if (p->mm && p->activated != -1 &&
+		if (p->mm && p->sleep_type != SLEEP_NONINTERACTIVE &&
 			sleep_time > INTERACTIVE_SLEEP(p)) {
 				p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
 						DEF_TIMESLICE);
@@ -714,7 +714,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
 			 * limited in their sleep_avg rise as they
 			 * are likely to be waiting on I/O
 			 */
-			if (p->activated == -1 && p->mm) {
+			if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
 				if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
 					sleep_time = 0;
 				else if (p->sleep_avg + sleep_time >=
@@ -769,7 +769,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
 	 * This checks to make sure it's not an uninterruptible task
 	 * that is now waking up.
 	 */
-	if (!p->activated) {
+	if (p->sleep_type == SLEEP_NORMAL) {
 		/*
 		 * Tasks which were woken up by interrupts (ie. hw events)
 		 * are most likely of interactive nature. So we give them
@@ -778,13 +778,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
 		 * on a CPU, first time around:
 		 */
 		if (in_interrupt())
-			p->activated = 2;
+			p->sleep_type = SLEEP_INTERRUPTED;
 		else {
 			/*
 			 * Normal first-time wakeups get a credit too for
 			 * on-runqueue time, but it will be weighted down:
 			 */
-			p->activated = 1;
+			p->sleep_type = SLEEP_INTERACTIVE;
 		}
 	}
 	p->timestamp = now;
@@ -1272,7 +1272,7 @@ out_activate:
 		 * Tasks on involuntary sleep don't earn
 		 * sleep_avg beyond just interactive state.
 		 */
-		p->activated = -1;
+		p->sleep_type = SLEEP_NONINTERACTIVE;
 	}
 
 	/*
@@ -2875,6 +2875,12 @@ EXPORT_SYMBOL(sub_preempt_count);
 
 #endif
 
+static inline int interactive_sleep(enum sleep_type sleep_type)
+{
+	return (sleep_type == SLEEP_INTERACTIVE ||
+		sleep_type == SLEEP_INTERRUPTED);
+}
+
 /*
  * schedule() is the main scheduler function.
  */
@@ -2998,12 +3004,12 @@ go_idle:
 	queue = array->queue + idx;
 	next = list_entry(queue->next, task_t, run_list);
 
-	if (!rt_task(next) && next->activated > 0) {
+	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
 		unsigned long long delta = now - next->timestamp;
 		if (unlikely((long long)(now - next->timestamp) < 0))
 			delta = 0;
 
-		if (next->activated == 1)
+		if (next->sleep_type == SLEEP_INTERACTIVE)
 			delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
 
 		array = next->array;
@@ -3016,7 +3022,7 @@ go_idle:
 		} else
 			requeue_task(next, array);
 	}
-	next->activated = 0;
+	next->sleep_type = SLEEP_NORMAL;
 switch_tasks:
 	if (next == rq->idle)
 		schedstat_inc(rq, sched_goidle);
-- 
cgit v1.2.3


From d425b274ba83ba4e7746a40446ec0ba3267de51f Mon Sep 17 00:00:00 2001
From: Con Kolivas <kernel@kolivas.org>
Date: Fri, 31 Mar 2006 02:31:29 -0800
Subject: [PATCH] sched: activate SCHED BATCH expired

To increase the strength of SCHED_BATCH as a scheduling hint we can
activate batch tasks on the expired array since by definition they are
latency insensitive tasks.

Signed-off-by: Con Kolivas <kernel@kolivas.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c4fd3fcd3feb..78c40dd2e19a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -484,6 +484,7 @@ struct signal_struct {
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
 
 #define rt_task(p)		(unlikely((p)->prio < MAX_RT_PRIO))
+#define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
 
 /*
  * Some day this will be a full-fledged user tracking system..
diff --git a/kernel/sched.c b/kernel/sched.c
index 73bb4d9ef989..dd153d6f8a04 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -667,9 +667,13 @@ static int effective_prio(task_t *p)
 /*
  * __activate_task - move a task to the runqueue.
  */
-static inline void __activate_task(task_t *p, runqueue_t *rq)
+static void __activate_task(task_t *p, runqueue_t *rq)
 {
-	enqueue_task(p, rq->active);
+	prio_array_t *target = rq->active;
+
+	if (batch_task(p))
+		target = rq->expired;
+	enqueue_task(p, target);
 	rq->nr_running++;
 }
 
@@ -688,7 +692,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
 	unsigned long long __sleep_time = now - p->timestamp;
 	unsigned long sleep_time;
 
-	if (unlikely(p->policy == SCHED_BATCH))
+	if (batch_task(p))
 		sleep_time = 0;
 	else {
 		if (__sleep_time > NS_MAX_SLEEP_AVG)
-- 
cgit v1.2.3


From 158d9ebd19280582da172626ad3edda1a626dace Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 31 Mar 2006 02:31:34 -0800
Subject: [PATCH] resurrect __put_task_struct

This just got nuked in mainline.  Bring it back because Eric's patches use it.

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 +
 kernel/fork.c         | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78c40dd2e19a..95f248ba36c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -906,6 +906,7 @@ extern void free_task(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct_cb(struct rcu_head *rhp);
+extern void __put_task_struct(struct task_struct *t);
 
 static inline void put_task_struct(struct task_struct *t)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index b3f7a1bb5e55..b1341205be27 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,10 +108,8 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
 
-void __put_task_struct_cb(struct rcu_head *rhp)
+void __put_task_struct(struct task_struct *tsk)
 {
-	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
-
 	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
@@ -126,6 +124,12 @@ void __put_task_struct_cb(struct rcu_head *rhp)
 		free_task(tsk);
 }
 
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+	__put_task_struct(tsk);
+}
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-- 
cgit v1.2.3


From 8c7904a00b06d2ee51149794b619e07369fcf9d4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 31 Mar 2006 02:31:37 -0800
Subject: [PATCH] task: RCU protect task->usage

A big problem with rcu protected data structures that are also reference
counted is that you must jump through several hoops to increase the reference
count.  I think someone finally implemented atomic_inc_not_zero(&count) to
automate the common case.  Unfortunately this means you must special case the
rcu access case.

When data structures are only visible via rcu in a manner that is not
determined by the reference count on the object (i.e.  tasks are visible until
their zombies are reaped) there is a much simpler technique we can employ.
Simply delaying the decrement of the reference count until the rcu interval is
over.

What that means is that the proc code that looks up a task and later
wants to sleep can now do:

rcu_read_lock();
task = find_task_by_pid(some_pid);
if (task) {
	get_task_struct(task);
}
rcu_read_unlock();

The effect on the rest of the kernel is that put_task_struct becomes cheaper
and immediate, and in the case where the task has been reaped it frees the
task immediate instead of unnecessarily waiting an until the rcu interval is
over.

Cleanup of task_struct does not happen when its reference count drops to
zero, instead cleanup happens when release_task is called.  Tasks can only
be looked up via rcu before release_task is called.  All rcu protected
members of task_struct are freed by release_task.

Therefore we can move call_rcu from put_task_struct into release_task.  And
we can modify release_task to not immediately release the reference count
but instead have it call put_task_struct from the function it gives to
call_rcu.

The end result:

- get_task_struct is safe in an rcu context where we have just looked
  up the task.

- put_task_struct() simplifies into its old pre rcu self.

This reorganization also makes put_task_struct uncallable from modules as
it is not exported but it does not appear to be called from any modules so
this should not be an issue, and is trivially fixed.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 +-
 kernel/exit.c         | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95f248ba36c9..7e0ff5dba986 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -911,7 +911,7 @@ extern void __put_task_struct(struct task_struct *t);
 static inline void put_task_struct(struct task_struct *t)
 {
 	if (atomic_dec_and_test(&t->usage))
-		call_rcu(&t->rcu, __put_task_struct_cb);
+		__put_task_struct(t);
 }
 
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index bc0ec674d3f4..6c2eeb8f6390 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,11 @@ static void __exit_signal(struct task_struct *tsk)
 	}
 }
 
+static void delayed_put_task_struct(struct rcu_head *rhp)
+{
+	put_task_struct(container_of(rhp, struct task_struct, rcu));
+}
+
 void release_task(struct task_struct * p)
 {
 	int zap_leader;
@@ -168,7 +173,7 @@ repeat:
 	spin_unlock(&p->proc_lock);
 	proc_pid_flush(proc_dentry);
 	release_thread(p);
-	put_task_struct(p);
+	call_rcu(&p->rcu, delayed_put_task_struct);
 
 	p = leader;
 	if (unlikely(zap_leader))
-- 
cgit v1.2.3


From 92476d7fc0326a409ab1d3864a04093a6be9aca7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 31 Mar 2006 02:31:42 -0800
Subject: [PATCH] pidhash: Refactor the pid hash table

Simplifies the code, reduces the need for 4 pid hash tables, and makes the
code more capable.

In the discussions I had with Oleg it was felt that to a large extent the
cleanup itself justified the work.  With struct pid being dynamically
allocated meant we could create the hash table entry when the pid was
allocated and free the hash table entry when the pid was freed.  Instead of
playing with the hash lists when ever a process would attach or detach to a
process.

For myself the fact that it gave what my previous task_ref patch gave for free
with simpler code was a big win.  The problem is that if you hold a reference
to struct task_struct you lock in 10K of low memory.  If you do that in a user
controllable way like /proc does, with an unprivileged but hostile user space
application with typical resource limits of 1000 fds and 100 processes I can
trigger the OOM killer by consuming all of low memory with task structs, on a
machine wight 1GB of low memory.

If I instead hold a reference to struct pid which holds a pointer to my
task_struct, I don't suffer from that problem because struct pid is 2 orders
of magnitude smaller.  In fact struct pid is small enough that most other
kernel data structures dwarf it, so simply limiting the number of referring
data structures is enough to prevent exhaustion of low memory.

This splits the current struct pid into two structures, struct pid and struct
pid_link, and reduces our number of hash tables from PIDTYPE_MAX to just one.
struct pid_link is the per process linkage into the hash tables and lives in
struct task_struct.  struct pid is given an indepedent lifetime, and holds
pointers to each of the pid types.

The independent life of struct pid simplifies attach_pid, and detach_pid,
because we are always manipulating the list of pids and not the hash table.
In addition in giving struct pid an indpendent life it makes the concept much
more powerful.

Kernel data structures can now embed a struct pid * instead of a pid_t and
not suffer from pid wrap around problems or from keeping unnecessarily
large amounts of memory allocated.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pid.h   |  96 +++++++++++++++++++----
 include/linux/sched.h |   4 +-
 kernel/fork.c         |  16 ++--
 kernel/pid.c          | 212 ++++++++++++++++++++++++++++++++++----------------
 4 files changed, 238 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 5b9082cc600f..29960b03bef7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
+#include <linux/rcupdate.h>
+
 enum pid_type
 {
 	PIDTYPE_PID,
@@ -9,45 +11,109 @@ enum pid_type
 	PIDTYPE_MAX
 };
 
+/*
+ * What is struct pid?
+ *
+ * A struct pid is the kernel's internal notion of a process identifier.
+ * It refers to individual tasks, process groups, and sessions.  While
+ * there are processes attached to it the struct pid lives in a hash
+ * table, so it and then the processes that it refers to can be found
+ * quickly from the numeric pid value.  The attached processes may be
+ * quickly accessed by following pointers from struct pid.
+ *
+ * Storing pid_t values in the kernel and refering to them later has a
+ * problem.  The process originally with that pid may have exited and the
+ * pid allocator wrapped, and another process could have come along
+ * and been assigned that pid.
+ *
+ * Referring to user space processes by holding a reference to struct
+ * task_struct has a problem.  When the user space process exits
+ * the now useless task_struct is still kept.  A task_struct plus a
+ * stack consumes around 10K of low kernel memory.  More precisely
+ * this is THREAD_SIZE + sizeof(struct task_struct).  By comparison
+ * a struct pid is about 64 bytes.
+ *
+ * Holding a reference to struct pid solves both of these problems.
+ * It is small so holding a reference does not consume a lot of
+ * resources, and since a new struct pid is allocated when the numeric
+ * pid value is reused we don't mistakenly refer to new processes.
+ */
+
 struct pid
 {
+	atomic_t count;
 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
 	int nr;
 	struct hlist_node pid_chain;
-	/* list of pids with the same nr, only one of them is in the hash */
-	struct list_head pid_list;
+	/* lists of tasks that use this pid */
+	struct hlist_head tasks[PIDTYPE_MAX];
+	struct rcu_head rcu;
 };
 
-#define pid_task(elem, type) \
-	list_entry(elem, struct task_struct, pids[type].pid_list)
+struct pid_link
+{
+	struct hlist_node node;
+	struct pid *pid;
+};
+
+static inline struct pid *get_pid(struct pid *pid)
+{
+	if (pid)
+		atomic_inc(&pid->count);
+	return pid;
+}
+
+extern void FASTCALL(put_pid(struct pid *pid));
+extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
+extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
+						enum pid_type));
 
 /*
  * attach_pid() and detach_pid() must be called with the tasklist_lock
  * write-held.
  */
-extern int FASTCALL(attach_pid(struct task_struct *task, enum pid_type type, int nr));
+extern int FASTCALL(attach_pid(struct task_struct *task,
+				enum pid_type type, int nr));
 
 extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
 
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
- * held.
+ * or rcu_read_lock() held.
+ */
+extern struct pid *FASTCALL(find_pid(int nr));
+
+/*
+ * Lookup a PID in the hash table, and return with it's count elevated.
  */
-extern struct pid *FASTCALL(find_pid(enum pid_type, int));
+extern struct pid *find_get_pid(int nr);
 
-extern int alloc_pidmap(void);
-extern void FASTCALL(free_pidmap(int));
+extern struct pid *alloc_pid(void);
+extern void FASTCALL(free_pid(struct pid *pid));
 
+#define pid_next(task, type)					\
+	((task)->pids[(type)].node.next)
+
+#define pid_next_task(task, type) 				\
+	hlist_entry(pid_next(task, type), struct task_struct,	\
+			pids[(type)].node)
+
+
+/* We could use hlist_for_each_entry_rcu here but it takes more arguments
+ * than the do_each_task_pid/while_each_task_pid.  So we roll our own
+ * to preserve the existing interface.
+ */
 #define do_each_task_pid(who, type, task)				\
 	if ((task = find_task_by_pid_type(type, who))) {		\
-		prefetch((task)->pids[type].pid_list.next);		\
+		prefetch(pid_next(task, type));				\
 		do {
 
 #define while_each_task_pid(who, type, task)				\
-		} while (task = pid_task((task)->pids[type].pid_list.next,\
-						type),			\
-			prefetch((task)->pids[type].pid_list.next),	\
-			hlist_unhashed(&(task)->pids[type].pid_chain));	\
-	}								\
+		} while (pid_next(task, type) &&  ({			\
+				task = pid_next_task(task, type);	\
+				rcu_dereference(task);			\
+				prefetch(pid_next(task, type));		\
+				1; }) );				\
+	}
 
 #endif /* _LINUX_PID_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7e0ff5dba986..541f4828f5e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -760,7 +760,7 @@ struct task_struct {
 	struct task_struct *group_leader;	/* threadgroup leader */
 
 	/* PID/PID hash table linkage. */
-	struct pid pids[PIDTYPE_MAX];
+	struct pid_link pids[PIDTYPE_MAX];
 	struct list_head thread_group;
 
 	struct completion *vfork_done;		/* for vfork() */
@@ -899,7 +899,7 @@ static inline pid_t process_group(struct task_struct *tsk)
  */
 static inline int pid_alive(struct task_struct *p)
 {
-	return p->pids[PIDTYPE_PID].nr != 0;
+	return p->pids[PIDTYPE_PID].pid != NULL;
 }
 
 extern void free_task(struct task_struct *tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index b1341205be27..03975d0467f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1315,17 +1315,19 @@ long do_fork(unsigned long clone_flags,
 {
 	struct task_struct *p;
 	int trace = 0;
-	long pid = alloc_pidmap();
+	struct pid *pid = alloc_pid();
+	long nr;
 
-	if (pid < 0)
+	if (!pid)
 		return -EAGAIN;
+	nr = pid->nr;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, nr);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1352,7 +1354,7 @@ long do_fork(unsigned long clone_flags,
 			p->state = TASK_STOPPED;
 
 		if (unlikely (trace)) {
-			current->ptrace_message = pid;
+			current->ptrace_message = nr;
 			ptrace_notify ((trace << 8) | SIGTRAP);
 		}
 
@@ -1362,10 +1364,10 @@ long do_fork(unsigned long clone_flags,
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
 		}
 	} else {
-		free_pidmap(pid);
-		pid = PTR_ERR(p);
+		free_pid(pid);
+		nr = PTR_ERR(p);
 	}
-	return pid;
+	return nr;
 }
 
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
diff --git a/kernel/pid.c b/kernel/pid.c
index a9f2dfd006d2..eeb836b65ca4 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -28,8 +28,9 @@
 #include <linux/hash.h>
 
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
-static struct hlist_head *pid_hash[PIDTYPE_MAX];
+static struct hlist_head *pid_hash;
 static int pidhash_shift;
+static kmem_cache_t *pid_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 int last_pid;
@@ -60,9 +61,22 @@ typedef struct pidmap {
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
 	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
 
+/*
+ * Note: disable interrupts while the pidmap_lock is held as an
+ * interrupt might come in and do read_lock(&tasklist_lock).
+ *
+ * If we don't disable interrupts there is a nasty deadlock between
+ * detach_pid()->free_pid() and another cpu that does
+ * spin_lock(&pidmap_lock) followed by an interrupt routine that does
+ * read_lock(&tasklist_lock);
+ *
+ * After we clean up the tasklist_lock and know there are no
+ * irq handlers that take it we can leave the interrupts enabled.
+ * For now it is easier to be safe than to prove it can't happen.
+ */
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-fastcall void free_pidmap(int pid)
+static fastcall void free_pidmap(int pid)
 {
 	pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
 	int offset = pid & BITS_PER_PAGE_MASK;
@@ -71,7 +85,7 @@ fastcall void free_pidmap(int pid)
 	atomic_inc(&map->nr_free);
 }
 
-int alloc_pidmap(void)
+static int alloc_pidmap(void)
 {
 	int i, offset, max_scan, pid, last = last_pid;
 	pidmap_t *map;
@@ -89,12 +103,12 @@ int alloc_pidmap(void)
 			 * Free the page if someone raced with us
 			 * installing it:
 			 */
-			spin_lock(&pidmap_lock);
+			spin_lock_irq(&pidmap_lock);
 			if (map->page)
 				free_page(page);
 			else
 				map->page = (void *)page;
-			spin_unlock(&pidmap_lock);
+			spin_unlock_irq(&pidmap_lock);
 			if (unlikely(!map->page))
 				break;
 		}
@@ -131,13 +145,73 @@ int alloc_pidmap(void)
 	return -1;
 }
 
-struct pid * fastcall find_pid(enum pid_type type, int nr)
+fastcall void put_pid(struct pid *pid)
+{
+	if (!pid)
+		return;
+	if ((atomic_read(&pid->count) == 1) ||
+	     atomic_dec_and_test(&pid->count))
+		kmem_cache_free(pid_cachep, pid);
+}
+
+static void delayed_put_pid(struct rcu_head *rhp)
+{
+	struct pid *pid = container_of(rhp, struct pid, rcu);
+	put_pid(pid);
+}
+
+fastcall void free_pid(struct pid *pid)
+{
+	/* We can be called with write_lock_irq(&tasklist_lock) held */
+	unsigned long flags;
+
+	spin_lock_irqsave(&pidmap_lock, flags);
+	hlist_del_rcu(&pid->pid_chain);
+	spin_unlock_irqrestore(&pidmap_lock, flags);
+
+	free_pidmap(pid->nr);
+	call_rcu(&pid->rcu, delayed_put_pid);
+}
+
+struct pid *alloc_pid(void)
+{
+	struct pid *pid;
+	enum pid_type type;
+	int nr = -1;
+
+	pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
+	if (!pid)
+		goto out;
+
+	nr = alloc_pidmap();
+	if (nr < 0)
+		goto out_free;
+
+	atomic_set(&pid->count, 1);
+	pid->nr = nr;
+	for (type = 0; type < PIDTYPE_MAX; ++type)
+		INIT_HLIST_HEAD(&pid->tasks[type]);
+
+	spin_lock_irq(&pidmap_lock);
+	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
+	spin_unlock_irq(&pidmap_lock);
+
+out:
+	return pid;
+
+out_free:
+	kmem_cache_free(pid_cachep, pid);
+	pid = NULL;
+	goto out;
+}
+
+struct pid * fastcall find_pid(int nr)
 {
 	struct hlist_node *elem;
 	struct pid *pid;
 
 	hlist_for_each_entry_rcu(pid, elem,
-			&pid_hash[type][pid_hashfn(nr)], pid_chain) {
+			&pid_hash[pid_hashfn(nr)], pid_chain) {
 		if (pid->nr == nr)
 			return pid;
 	}
@@ -146,77 +220,82 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
 
 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 {
-	struct pid *pid, *task_pid;
-
-	task_pid = &task->pids[type];
-	pid = find_pid(type, nr);
-	task_pid->nr = nr;
-	if (pid == NULL) {
-		INIT_LIST_HEAD(&task_pid->pid_list);
-		hlist_add_head_rcu(&task_pid->pid_chain,
-				   &pid_hash[type][pid_hashfn(nr)]);
-	} else {
-		INIT_HLIST_NODE(&task_pid->pid_chain);
-		list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
-	}
+	struct pid_link *link;
+	struct pid *pid;
+
+	WARN_ON(!task->pid); /* to be removed soon */
+	WARN_ON(!nr); /* to be removed soon */
+
+	link = &task->pids[type];
+	link->pid = pid = find_pid(nr);
+	hlist_add_head_rcu(&link->node, &pid->tasks[type]);
 
 	return 0;
 }
 
-static fastcall int __detach_pid(task_t *task, enum pid_type type)
+void fastcall detach_pid(task_t *task, enum pid_type type)
 {
-	struct pid *pid, *pid_next;
-	int nr = 0;
+	struct pid_link *link;
+	struct pid *pid;
+	int tmp;
 
-	pid = &task->pids[type];
-	if (!hlist_unhashed(&pid->pid_chain)) {
+	link = &task->pids[type];
+	pid = link->pid;
 
-		if (list_empty(&pid->pid_list)) {
-			nr = pid->nr;
-			hlist_del_rcu(&pid->pid_chain);
-		} else {
-			pid_next = list_entry(pid->pid_list.next,
-						struct pid, pid_list);
-			/* insert next pid from pid_list to hash */
-			hlist_replace_rcu(&pid->pid_chain,
-					  &pid_next->pid_chain);
-		}
-	}
+	hlist_del_rcu(&link->node);
+	link->pid = NULL;
 
-	list_del_rcu(&pid->pid_list);
-	pid->nr = 0;
+	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
+		if (!hlist_empty(&pid->tasks[tmp]))
+			return;
 
-	return nr;
+	free_pid(pid);
 }
 
-void fastcall detach_pid(task_t *task, enum pid_type type)
+struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 {
-	int tmp, nr;
+	struct task_struct *result = NULL;
+	if (pid) {
+		struct hlist_node *first;
+		first = rcu_dereference(pid->tasks[type].first);
+		if (first)
+			result = hlist_entry(first, struct task_struct, pids[(type)].node);
+	}
+	return result;
+}
 
-	nr = __detach_pid(task, type);
-	if (!nr)
-		return;
+/*
+ * Must be called under rcu_read_lock() or with tasklist_lock read-held.
+ */
+task_t *find_task_by_pid_type(int type, int nr)
+{
+	return pid_task(find_pid(nr), type);
+}
 
-	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
-		if (tmp != type && find_pid(tmp, nr))
-			return;
+EXPORT_SYMBOL(find_task_by_pid_type);
 
-	free_pidmap(nr);
+struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
+{
+	struct task_struct *result;
+	rcu_read_lock();
+	result = pid_task(pid, type);
+	if (result)
+		get_task_struct(result);
+	rcu_read_unlock();
+	return result;
 }
 
-task_t *find_task_by_pid_type(int type, int nr)
+struct pid *find_get_pid(pid_t nr)
 {
 	struct pid *pid;
 
-	pid = find_pid(type, nr);
-	if (!pid)
-		return NULL;
+	rcu_read_lock();
+	pid = get_pid(find_pid(nr));
+	rcu_read_unlock();
 
-	return pid_task(&pid->pid_list, type);
+	return pid;
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
-
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -224,7 +303,7 @@ EXPORT_SYMBOL(find_task_by_pid_type);
  */
 void __init pidhash_init(void)
 {
-	int i, j, pidhash_size;
+	int i, pidhash_size;
 	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 
 	pidhash_shift = max(4, fls(megabytes * 4));
@@ -233,16 +312,13 @@ void __init pidhash_init(void)
 
 	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 		pidhash_size, pidhash_shift,
-		PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
-
-	for (i = 0; i < PIDTYPE_MAX; i++) {
-		pid_hash[i] = alloc_bootmem(pidhash_size *
-					sizeof(*(pid_hash[i])));
-		if (!pid_hash[i])
-			panic("Could not alloc pidhash!\n");
-		for (j = 0; j < pidhash_size; j++)
-			INIT_HLIST_HEAD(&pid_hash[i][j]);
-	}
+		pidhash_size * sizeof(struct hlist_head));
+
+	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
+	if (!pid_hash)
+		panic("Could not alloc pidhash!\n");
+	for (i = 0; i < pidhash_size; i++)
+		INIT_HLIST_HEAD(&pid_hash[i]);
 }
 
 void __init pidmap_init(void)
@@ -251,4 +327,8 @@ void __init pidmap_init(void)
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, pidmap_array->page);
 	atomic_dec(&pidmap_array->nr_free);
+
+	pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
+					__alignof__(struct pid),
+					SLAB_PANIC, NULL, NULL);
 }
-- 
cgit v1.2.3


From 3e7e241f8c5c87cc3685364feface081c9fa3648 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 31 Mar 2006 02:31:43 -0800
Subject: [PATCH] dcache: Add helper d_hash_and_lookup

It is very common to hash a dentry and then to call lookup.  If we take fs
specific hash functions into account the full hash logic can get ugly.
Further full_name_hash as an inline function is almost 100 bytes on x86 so
having a non-inline choice in some cases can measurably decrease code size.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c            | 44 ++++++++++++++++++++++++++++----------------
 include/linux/dcache.h |  1 +
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 21dffeec755b..940d188e5d14 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1100,6 +1100,32 @@ next:
  	return found;
 }
 
+/**
+ * d_hash_and_lookup - hash the qstr then search for a dentry
+ * @dir: Directory to search in
+ * @name: qstr of name we wish to find
+ *
+ * On hash failure or on lookup failure NULL is returned.
+ */
+struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
+{
+	struct dentry *dentry = NULL;
+
+	/*
+	 * Check for a fs-specific hash function. Note that we must
+	 * calculate the standard hash first, as the d_op->d_hash()
+	 * routine may choose to leave the hash value unchanged.
+	 */
+	name->hash = full_name_hash(name->name, name->len);
+	if (dir->d_op && dir->d_op->d_hash) {
+		if (dir->d_op->d_hash(dir, name) < 0)
+			goto out;
+	}
+	dentry = d_lookup(dir, name);
+out:
+	return dentry;
+}
+
 /**
  * d_validate - verify dentry provided from insecure source
  * @dentry: The dentry alleged to be valid child of @dparent
@@ -1616,26 +1642,12 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
 	struct dentry * dentry;
 	ino_t ino = 0;
 
-	/*
-	 * Check for a fs-specific hash function. Note that we must
-	 * calculate the standard hash first, as the d_op->d_hash()
-	 * routine may choose to leave the hash value unchanged.
-	 */
-	name->hash = full_name_hash(name->name, name->len);
-	if (dir->d_op && dir->d_op->d_hash)
-	{
-		if (dir->d_op->d_hash(dir, name) != 0)
-			goto out;
-	}
-
-	dentry = d_lookup(dir, name);
-	if (dentry)
-	{
+	dentry = d_hash_and_lookup(dir, name);
+	if (dentry) {
 		if (dentry->d_inode)
 			ino = dentry->d_inode->i_ino;
 		dput(dentry);
 	}
-out:
 	return ino;
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d10bd30c337e..836325ee0931 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -275,6 +275,7 @@ extern void d_move(struct dentry *, struct dentry *);
 /* appendix may either be NULL or be used for transname suffixes */
 extern struct dentry * d_lookup(struct dentry *, struct qstr *);
 extern struct dentry * __d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * d_hash_and_lookup(struct dentry *, struct qstr *);
 
 /* validate "insecure" dentry pointer */
 extern int d_validate(struct dentry *, struct dentry *);
-- 
cgit v1.2.3


From 6ca017658b1f902c9bba2cc1017e301581f7728d Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 31 Mar 2006 02:31:49 -0800
Subject: [PATCH] backlight: Backlight Class Improvements

Backlight class attributes are currently easy to implement incorrectly.
Moving certain handling into the backlight core prevents this whilst at the
same time makes the drivers simpler and consistent.  The following changes are
included:

The brightness attribute only sets and reads the brightness variable in the
backlight_properties structure.

The power attribute only sets and reads the power variable in the
backlight_properties structure.

Any framebuffer blanking events change a variable fb_blank in the
backlight_properties structure.

The backlight driver has only two functions to implement.  One function is
called when any of the above properties change (to update the backlight
brightness), the second is called to return the current backlight brightness
value.  A new attribute "actual_brightness" is added to return this brightness
as determined by the driver having combined all the above factors (and any
driver/device specific factors).

Additionally, the backlight core takes care of checking the maximum brightness
is not exceeded and of turning off the backlight before device removal.

The corgi backlight driver is updated to reflect these changes.

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/backlight/backlight.c | 84 ++++++++++++++++++++-----------
 drivers/video/backlight/corgi_bl.c  | 99 ++++++++++++++-----------------------
 include/linux/backlight.h           | 25 ++++++----
 3 files changed, 107 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 151fda8dded0..334b1db1bd7c 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -16,14 +16,12 @@
 
 static ssize_t backlight_show_power(struct class_device *cdev, char *buf)
 {
-	int rc;
+	int rc = -ENXIO;
 	struct backlight_device *bd = to_backlight_device(cdev);
 
 	down(&bd->sem);
-	if (likely(bd->props && bd->props->get_power))
-		rc = sprintf(buf, "%d\n", bd->props->get_power(bd));
-	else
-		rc = -ENXIO;
+	if (likely(bd->props))
+		rc = sprintf(buf, "%d\n", bd->props->power);
 	up(&bd->sem);
 
 	return rc;
@@ -31,7 +29,7 @@ static ssize_t backlight_show_power(struct class_device *cdev, char *buf)
 
 static ssize_t backlight_store_power(struct class_device *cdev, const char *buf, size_t count)
 {
-	int rc, power;
+	int rc = -ENXIO, power;
 	char *endp;
 	struct backlight_device *bd = to_backlight_device(cdev);
 
@@ -40,12 +38,13 @@ static ssize_t backlight_store_power(struct class_device *cdev, const char *buf,
 		return -EINVAL;
 
 	down(&bd->sem);
-	if (likely(bd->props && bd->props->set_power)) {
+	if (likely(bd->props)) {
 		pr_debug("backlight: set power to %d\n", power);
-		bd->props->set_power(bd, power);
+		bd->props->power = power;
+		if (likely(bd->props->update_status))
+			bd->props->update_status(bd);
 		rc = count;
-	} else
-		rc = -ENXIO;
+	}
 	up(&bd->sem);
 
 	return rc;
@@ -53,14 +52,12 @@ static ssize_t backlight_store_power(struct class_device *cdev, const char *buf,
 
 static ssize_t backlight_show_brightness(struct class_device *cdev, char *buf)
 {
-	int rc;
+	int rc = -ENXIO;
 	struct backlight_device *bd = to_backlight_device(cdev);
 
 	down(&bd->sem);
-	if (likely(bd->props && bd->props->get_brightness))
-		rc = sprintf(buf, "%d\n", bd->props->get_brightness(bd));
-	else
-		rc = -ENXIO;
+	if (likely(bd->props))
+		rc = sprintf(buf, "%d\n", bd->props->brightness);
 	up(&bd->sem);
 
 	return rc;
@@ -68,7 +65,7 @@ static ssize_t backlight_show_brightness(struct class_device *cdev, char *buf)
 
 static ssize_t backlight_store_brightness(struct class_device *cdev, const char *buf, size_t count)
 {
-	int rc, brightness;
+	int rc = -ENXIO, brightness;
 	char *endp;
 	struct backlight_device *bd = to_backlight_device(cdev);
 
@@ -77,12 +74,18 @@ static ssize_t backlight_store_brightness(struct class_device *cdev, const char
 		return -EINVAL;
 
 	down(&bd->sem);
-	if (likely(bd->props && bd->props->set_brightness)) {
-		pr_debug("backlight: set brightness to %d\n", brightness);
-		bd->props->set_brightness(bd, brightness);
-		rc = count;
-	} else
-		rc = -ENXIO;
+	if (likely(bd->props)) {
+		if (brightness > bd->props->max_brightness)
+			rc = -EINVAL;
+		else {
+			pr_debug("backlight: set brightness to %d\n",
+				 brightness);
+			bd->props->brightness = brightness;
+			if (likely(bd->props->update_status))
+				bd->props->update_status(bd);
+			rc = count;
+		}
+	}
 	up(&bd->sem);
 
 	return rc;
@@ -90,14 +93,26 @@ static ssize_t backlight_store_brightness(struct class_device *cdev, const char
 
 static ssize_t backlight_show_max_brightness(struct class_device *cdev, char *buf)
 {
-	int rc;
+	int rc = -ENXIO;
 	struct backlight_device *bd = to_backlight_device(cdev);
 
 	down(&bd->sem);
 	if (likely(bd->props))
 		rc = sprintf(buf, "%d\n", bd->props->max_brightness);
-	else
-		rc = -ENXIO;
+	up(&bd->sem);
+
+	return rc;
+}
+
+static ssize_t backlight_show_actual_brightness(struct class_device *cdev,
+						char *buf)
+{
+	int rc = -ENXIO;
+	struct backlight_device *bd = to_backlight_device(cdev);
+
+	down(&bd->sem);
+	if (likely(bd->props && bd->props->get_brightness))
+		rc = sprintf(buf, "%d\n", bd->props->get_brightness(bd));
 	up(&bd->sem);
 
 	return rc;
@@ -123,7 +138,10 @@ static struct class backlight_class = {
 
 static struct class_device_attribute bl_class_device_attributes[] = {
 	DECLARE_ATTR(power, 0644, backlight_show_power, backlight_store_power),
-	DECLARE_ATTR(brightness, 0644, backlight_show_brightness, backlight_store_brightness),
+	DECLARE_ATTR(brightness, 0644, backlight_show_brightness,
+		     backlight_store_brightness),
+	DECLARE_ATTR(actual_brightness, 0444, backlight_show_actual_brightness,
+		     NULL),
 	DECLARE_ATTR(max_brightness, 0444, backlight_show_max_brightness, NULL),
 };
 
@@ -144,8 +162,12 @@ static int fb_notifier_callback(struct notifier_block *self,
 	bd = container_of(self, struct backlight_device, fb_notif);
 	down(&bd->sem);
 	if (bd->props)
-		if (!bd->props->check_fb || bd->props->check_fb(evdata->info))
-			bd->props->set_power(bd, *(int *)evdata->data);
+		if (!bd->props->check_fb ||
+		    bd->props->check_fb(evdata->info)) {
+			bd->props->fb_blank = *(int *)evdata->data;
+			if (likely(bd->props && bd->props->update_status))
+				bd->props->update_status(bd);
+		}
 	up(&bd->sem);
 	return 0;
 }
@@ -231,6 +253,12 @@ void backlight_device_unregister(struct backlight_device *bd)
 					 &bl_class_device_attributes[i]);
 
 	down(&bd->sem);
+	if (likely(bd->props && bd->props->update_status)) {
+		bd->props->brightness = 0;
+		bd->props->power = 0;
+		bd->props->update_status(bd);
+	}
+
 	bd->props = NULL;
 	up(&bd->sem);
 
diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c
index d0aaf450e8c7..f86213b4a8fb 100644
--- a/drivers/video/backlight/corgi_bl.c
+++ b/drivers/video/backlight/corgi_bl.c
@@ -25,24 +25,30 @@
 #define CORGI_DEFAULT_INTENSITY		0x1f
 #define CORGI_LIMIT_MASK		0x0b
 
-static int corgibl_powermode = FB_BLANK_UNBLANK;
-static int current_intensity = 0;
-static int corgibl_limit = 0;
+static int corgibl_intensity;
 static void (*corgibl_mach_set_intensity)(int intensity);
 static spinlock_t bl_lock = SPIN_LOCK_UNLOCKED;
 static struct backlight_properties corgibl_data;
+static struct backlight_device *corgi_backlight_device;
+
+static unsigned long corgibl_flags;
+#define CORGIBL_SUSPENDED     0x01
+#define CORGIBL_BATTLOW       0x02
 
-static void corgibl_send_intensity(int intensity)
+static int corgibl_send_intensity(struct backlight_device *bd)
 {
 	unsigned long flags;
 	void (*corgi_kick_batt)(void);
+	int intensity = bd->props->brightness;
 
-	if (corgibl_powermode != FB_BLANK_UNBLANK) {
+	if (bd->props->power != FB_BLANK_UNBLANK)
+		intensity = 0;
+	if (bd->props->fb_blank != FB_BLANK_UNBLANK)
 		intensity = 0;
-	} else {
-		if (corgibl_limit)
-			intensity &= CORGI_LIMIT_MASK;
-	}
+	if (corgibl_flags & CORGIBL_SUSPENDED)
+		intensity = 0;
+	if (corgibl_flags & CORGIBL_BATTLOW)
+		intensity &= CORGI_LIMIT_MASK;
 
 	spin_lock_irqsave(&bl_lock, flags);
 
@@ -50,45 +56,29 @@ static void corgibl_send_intensity(int intensity)
 
 	spin_unlock_irqrestore(&bl_lock, flags);
 
+	corgibl_intensity = intensity;
+
  	corgi_kick_batt = symbol_get(sharpsl_battery_kick);
  	if (corgi_kick_batt) {
  		corgi_kick_batt();
  		symbol_put(sharpsl_battery_kick);
  	}
-}
 
-static void corgibl_blank(int blank)
-{
-	switch(blank) {
-
-	case FB_BLANK_NORMAL:
-	case FB_BLANK_VSYNC_SUSPEND:
-	case FB_BLANK_HSYNC_SUSPEND:
-	case FB_BLANK_POWERDOWN:
-		if (corgibl_powermode == FB_BLANK_UNBLANK) {
-			corgibl_send_intensity(0);
-			corgibl_powermode = blank;
-		}
-		break;
-	case FB_BLANK_UNBLANK:
-		if (corgibl_powermode != FB_BLANK_UNBLANK) {
-			corgibl_powermode = blank;
-			corgibl_send_intensity(current_intensity);
-		}
-		break;
-	}
+	return 0;
 }
 
 #ifdef CONFIG_PM
 static int corgibl_suspend(struct platform_device *dev, pm_message_t state)
 {
-	corgibl_blank(FB_BLANK_POWERDOWN);
+	corgibl_flags |= CORGIBL_SUSPENDED;
+	corgibl_send_intensity(corgi_backlight_device);
 	return 0;
 }
 
 static int corgibl_resume(struct platform_device *dev)
 {
-	corgibl_blank(FB_BLANK_UNBLANK);
+	corgibl_flags &= ~CORGIBL_SUSPENDED;
+	corgibl_send_intensity(corgi_backlight_device);
 	return 0;
 }
 #else
@@ -96,54 +86,38 @@ static int corgibl_resume(struct platform_device *dev)
 #define corgibl_resume	NULL
 #endif
 
-
-static int corgibl_set_power(struct backlight_device *bd, int state)
-{
-	corgibl_blank(state);
-	return 0;
-}
-
-static int corgibl_get_power(struct backlight_device *bd)
+static int corgibl_get_intensity(struct backlight_device *bd)
 {
-	return corgibl_powermode;
+	return corgibl_intensity;
 }
 
-static int corgibl_set_intensity(struct backlight_device *bd, int intensity)
+static int corgibl_set_intensity(struct backlight_device *bd)
 {
-	if (intensity > corgibl_data.max_brightness)
-		intensity = corgibl_data.max_brightness;
-	corgibl_send_intensity(intensity);
-	current_intensity=intensity;
+	corgibl_send_intensity(corgi_backlight_device);
 	return 0;
 }
 
-static int corgibl_get_intensity(struct backlight_device *bd)
-{
-	return current_intensity;
-}
-
 /*
  * Called when the battery is low to limit the backlight intensity.
  * If limit==0 clear any limit, otherwise limit the intensity
  */
 void corgibl_limit_intensity(int limit)
 {
-	corgibl_limit = (limit ? 1 : 0);
-	corgibl_send_intensity(current_intensity);
+	if (limit)
+		corgibl_flags |= CORGIBL_BATTLOW;
+	else
+		corgibl_flags &= ~CORGIBL_BATTLOW;
+	corgibl_send_intensity(corgi_backlight_device);
 }
 EXPORT_SYMBOL(corgibl_limit_intensity);
 
 
 static struct backlight_properties corgibl_data = {
-	.owner		= THIS_MODULE,
-	.get_power      = corgibl_get_power,
-	.set_power      = corgibl_set_power,
+	.owner          = THIS_MODULE,
 	.get_brightness = corgibl_get_intensity,
-	.set_brightness = corgibl_set_intensity,
+	.update_status  = corgibl_set_intensity,
 };
 
-static struct backlight_device *corgi_backlight_device;
-
 static int __init corgibl_probe(struct platform_device *pdev)
 {
 	struct corgibl_machinfo *machinfo = pdev->dev.platform_data;
@@ -156,8 +130,9 @@ static int __init corgibl_probe(struct platform_device *pdev)
 	if (IS_ERR (corgi_backlight_device))
 		return PTR_ERR (corgi_backlight_device);
 
-	corgibl_set_intensity(NULL, CORGI_DEFAULT_INTENSITY);
-	corgibl_limit_intensity(0);
+	corgibl_data.power = FB_BLANK_UNBLANK;
+	corgibl_data.brightness = CORGI_DEFAULT_INTENSITY;
+	corgibl_send_intensity(corgi_backlight_device);
 
 	printk("Corgi Backlight Driver Initialized.\n");
 	return 0;
@@ -167,8 +142,6 @@ static int corgibl_remove(struct platform_device *dev)
 {
 	backlight_device_unregister(corgi_backlight_device);
 
-	corgibl_set_intensity(NULL, 0);
-
 	printk("Corgi Backlight Driver Unloaded\n");
 	return 0;
 }
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index bb9e54322322..75e91f5b6a04 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -19,20 +19,25 @@ struct fb_info;
 struct backlight_properties {
 	/* Owner module */
 	struct module *owner;
-	/* Get the backlight power status (0: full on, 1..3: power saving
-	   modes; 4: full off), see FB_BLANK_XXX */
-	int (*get_power)(struct backlight_device *);
-	/* Enable or disable power to the LCD (0: on; 4: off, see FB_BLANK_XXX) */
-	int (*set_power)(struct backlight_device *, int power);
-	/* Maximal value for brightness (read-only) */
-	int max_brightness;
-	/* Get current backlight brightness */
+
+	/* Notify the backlight driver some property has changed */
+	int (*update_status)(struct backlight_device *);
+	/* Return the current backlight brightness (accounting for power,
+	   fb_blank etc.) */
 	int (*get_brightness)(struct backlight_device *);
-	/* Set backlight brightness (0..max_brightness) */
-	int (*set_brightness)(struct backlight_device *, int brightness);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
 	int (*check_fb)(struct fb_info *);
+
+	/* Current User requested brightness (0 - max_brightness) */
+	int brightness;
+	/* Maximal value for brightness (read-only) */
+	int max_brightness;
+	/* Current FB Power mode (0: full on, 1..3: power saving
+	   modes; 4: full off), see FB_BLANK_XXX */
+	int power;
+	/* FB Blanking active? (values as for power) */
+	int fb_blank;
 };
 
 struct backlight_device {
-- 
cgit v1.2.3


From 2c0f5fb08e8ad59f396b1bda41ccd93cbb00a09f Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@rpsys.net>
Date: Fri, 31 Mar 2006 02:31:51 -0800
Subject: [PATCH] backlight: corgi_bl: Generalise to support other Sharp SL
 hardware

Generalise the Corgi backlight driver by moving the default intensity and
limit mask settings into the platform specific data structure.  This enables
the driver to support other Zaurus hardware, specifically the SL-6000x (Tosa)
model.

Also change the spinlock to a mutex (the spinlock is overkill).

Signed-off-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/mach-pxa/corgi.c          |  2 ++
 arch/arm/mach-pxa/spitz.c          |  2 ++
 drivers/video/backlight/Kconfig    |  4 ++--
 drivers/video/backlight/corgi_bl.c | 31 +++++++++++++------------------
 include/asm-arm/arch-pxa/sharpsl.h |  2 ++
 5 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 99604a544629..d6d726036361 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -141,6 +141,8 @@ struct corgissp_machinfo corgi_ssp_machinfo = {
  */
 static struct corgibl_machinfo corgi_bl_machinfo = {
 	.max_intensity = 0x2f,
+	.default_intensity = 0x1f,
+	.limit_mask = 0x0b,
 	.set_bl_intensity = corgi_bl_set_intensity,
 };
 
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index ebe473053392..19b372df544a 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -220,6 +220,8 @@ struct corgissp_machinfo spitz_ssp_machinfo = {
  * Spitz Backlight Device
  */
 static struct corgibl_machinfo spitz_bl_machinfo = {
+	.default_intensity = 0x1f,
+	.limit_mask = 0x0b,
 	.max_intensity = 0x2f,
 };
 
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 9d996f2c10d5..b895eaaa73fd 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -43,11 +43,11 @@ config LCD_DEVICE
 	default y
 
 config BACKLIGHT_CORGI
-	tristate "Sharp Corgi Backlight Driver (SL-C7xx Series)"
+	tristate "Sharp Corgi Backlight Driver (SL Series)"
 	depends on BACKLIGHT_DEVICE && PXA_SHARPSL
 	default y
 	help
-	  If you have a Sharp Zaurus SL-C7xx, say y to enable the
+	  If you have a Sharp Zaurus SL-C7xx, SL-Cxx00 or SL-6000x say y to enable the
 	  backlight driver.
 
 config BACKLIGHT_HP680
diff --git a/drivers/video/backlight/corgi_bl.c b/drivers/video/backlight/corgi_bl.c
index f86213b4a8fb..2ebbfd95145f 100644
--- a/drivers/video/backlight/corgi_bl.c
+++ b/drivers/video/backlight/corgi_bl.c
@@ -1,7 +1,7 @@
 /*
- *  Backlight Driver for Sharp Corgi
+ *  Backlight Driver for Sharp Zaurus Handhelds (various models)
  *
- *  Copyright (c) 2004-2005 Richard Purdie
+ *  Copyright (c) 2004-2006 Richard Purdie
  *
  *  Based on Sharp's 2.4 Backlight Driver
  *
@@ -15,21 +15,17 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/fb.h>
 #include <linux/backlight.h>
-
 #include <asm/arch/sharpsl.h>
 #include <asm/hardware/sharpsl_pm.h>
 
-#define CORGI_DEFAULT_INTENSITY		0x1f
-#define CORGI_LIMIT_MASK		0x0b
-
 static int corgibl_intensity;
-static void (*corgibl_mach_set_intensity)(int intensity);
-static spinlock_t bl_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_MUTEX(bl_mutex);
 static struct backlight_properties corgibl_data;
 static struct backlight_device *corgi_backlight_device;
+static struct corgibl_machinfo *bl_machinfo;
 
 static unsigned long corgibl_flags;
 #define CORGIBL_SUSPENDED     0x01
@@ -37,7 +33,6 @@ static unsigned long corgibl_flags;
 
 static int corgibl_send_intensity(struct backlight_device *bd)
 {
-	unsigned long flags;
 	void (*corgi_kick_batt)(void);
 	int intensity = bd->props->brightness;
 
@@ -48,13 +43,11 @@ static int corgibl_send_intensity(struct backlight_device *bd)
 	if (corgibl_flags & CORGIBL_SUSPENDED)
 		intensity = 0;
 	if (corgibl_flags & CORGIBL_BATTLOW)
-		intensity &= CORGI_LIMIT_MASK;
-
-	spin_lock_irqsave(&bl_lock, flags);
-
-	corgibl_mach_set_intensity(intensity);
+		intensity &= bl_machinfo->limit_mask;
 
-	spin_unlock_irqrestore(&bl_lock, flags);
+ 	mutex_lock(&bl_mutex);
+	bl_machinfo->set_bl_intensity(intensity);
+	mutex_unlock(&bl_mutex);
 
 	corgibl_intensity = intensity;
 
@@ -122,8 +115,10 @@ static int __init corgibl_probe(struct platform_device *pdev)
 {
 	struct corgibl_machinfo *machinfo = pdev->dev.platform_data;
 
+	bl_machinfo = machinfo;
 	corgibl_data.max_brightness = machinfo->max_intensity;
-	corgibl_mach_set_intensity = machinfo->set_bl_intensity;
+	if (!machinfo->limit_mask)
+		machinfo->limit_mask = -1;
 
 	corgi_backlight_device = backlight_device_register ("corgi-bl",
 		NULL, &corgibl_data);
@@ -131,7 +126,7 @@ static int __init corgibl_probe(struct platform_device *pdev)
 		return PTR_ERR (corgi_backlight_device);
 
 	corgibl_data.power = FB_BLANK_UNBLANK;
-	corgibl_data.brightness = CORGI_DEFAULT_INTENSITY;
+	corgibl_data.brightness = machinfo->default_intensity;
 	corgibl_send_intensity(corgi_backlight_device);
 
 	printk("Corgi Backlight Driver Initialized.\n");
diff --git a/include/asm-arm/arch-pxa/sharpsl.h b/include/asm-arm/arch-pxa/sharpsl.h
index 0b43495d24b4..94cb4982af82 100644
--- a/include/asm-arm/arch-pxa/sharpsl.h
+++ b/include/asm-arm/arch-pxa/sharpsl.h
@@ -27,6 +27,8 @@ struct corgits_machinfo {
  */
 struct corgibl_machinfo {
 	int max_intensity;
+	int default_intensity;
+	int limit_mask;
 	void (*set_bl_intensity)(int intensity);
 };
 extern void corgibl_limit_intensity(int limit);
-- 
cgit v1.2.3


From a536093a2f07007aa572e922752b7491b9ea8ff2 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Fri, 31 Mar 2006 02:31:54 -0800
Subject: [PATCH] fbcon: Fix big-endian bogosity in slow_imageblit()

The monochrome->color expansion routine that handles bitmaps which have
(widths % 8) != 0 (slow_imageblit) produces corrupt characters in big-endian.
This is caused by a bogus bit test in slow_imageblit().

Fix.

This patch may deserve to go to the stable tree.  The code has already been
well tested in little-endian machines.  It's only in big-endian where there is
uncertainty and Herbert confirmed that this is the correct way to go.

It should not introduce regressions.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Acked-by: Herbert Poetzl <herbert@13thfloor.at>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/cfbimgblt.c | 2 +-
 include/linux/fb.h        | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/video/cfbimgblt.c b/drivers/video/cfbimgblt.c
index 910e2338a27e..8ba6152db2fd 100644
--- a/drivers/video/cfbimgblt.c
+++ b/drivers/video/cfbimgblt.c
@@ -169,7 +169,7 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 
 		while (j--) {
 			l--;
-			color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
+			color = (*s & (1 << l)) ? fgcolor : bgcolor;
 			val |= FB_SHIFT_HIGH(color, shift);
 			
 			/* Did the bitshift spill bits to the next long? */
diff --git a/include/linux/fb.h b/include/linux/fb.h
index d03fadfcafe3..315d89740ddf 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -839,12 +839,10 @@ struct fb_info {
 #define FB_LEFT_POS(bpp)          (32 - bpp)
 #define FB_SHIFT_HIGH(val, bits)  ((val) >> (bits))
 #define FB_SHIFT_LOW(val, bits)   ((val) << (bits))
-#define FB_BIT_NR(b)              (7 - (b))
 #else
 #define FB_LEFT_POS(bpp)          (0)
 #define FB_SHIFT_HIGH(val, bits)  ((val) << (bits))
 #define FB_SHIFT_LOW(val, bits)   ((val) >> (bits))
-#define FB_BIT_NR(b)              (b)
 #endif
 
     /*
-- 
cgit v1.2.3


From a244e1698ae3609cdfe24088e1293593cb7a5278 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 31 Mar 2006 02:32:11 -0800
Subject: [PATCH] fs/namei.c: make lookup_hash() static

As announced, lookup_hash() can now become static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/feature-removal-schedule.txt | 7 -------
 fs/namei.c                                 | 3 +--
 include/linux/namei.h                      | 1 -
 3 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index a92b10bb0b03..59d0c74c79c9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -127,13 +127,6 @@ Who:	Christoph Hellwig <hch@lst.de>
 
 ---------------------------
 
-What:	EXPORT_SYMBOL(lookup_hash)
-When:	January 2006
-Why:	Too low-level interface.  Use lookup_one_len or lookup_create instead.
-Who:	Christoph Hellwig <hch@lst.de>
-
----------------------------
-
 What:	CONFIG_FORCED_INLINING
 When:	June 2006
 Why:	Config option is there to see if gcc is good enough. (in january
diff --git a/fs/namei.c b/fs/namei.c
index 22f6e8d16aa8..96723ae83c89 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1254,7 +1254,7 @@ out:
 	return dentry;
 }
 
-struct dentry * lookup_hash(struct nameidata *nd)
+static struct dentry *lookup_hash(struct nameidata *nd)
 {
 	return __lookup_hash(&nd->last, nd->dentry, nd);
 }
@@ -2697,7 +2697,6 @@ EXPORT_SYMBOL(follow_up);
 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
 EXPORT_SYMBOL(getname);
 EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_hash);
 EXPORT_SYMBOL(lookup_one_len);
 EXPORT_SYMBOL(page_follow_link_light);
 EXPORT_SYMBOL(page_put_link);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index e6698013e4d0..58cb3d3d44b4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -75,7 +75,6 @@ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
 extern void release_open_intent(struct nameidata *);
 
 extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
-extern __deprecated_for_modules struct dentry * lookup_hash(struct nameidata *);
 
 extern int follow_down(struct vfsmount **, struct dentry **);
 extern int follow_up(struct vfsmount **, struct dentry **);
-- 
cgit v1.2.3


From e358c1a2c45f7ec32d77cc09a2bb42d6823a4193 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 31 Mar 2006 02:32:13 -0800
Subject: [PATCH] mutex: some cleanups

Turn some macros into inline functions and add proper type checking as
well as being more readable.  Also a minor comment adjustment.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/mutex-dec.h  | 30 ++++++++++++++++--------------
 include/asm-generic/mutex-xchg.h | 33 +++++++++++++++++----------------
 2 files changed, 33 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index 40c6d1f86598..29c6ac34e236 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -17,13 +17,14 @@
  * it wasn't 1 originally. This function MUST leave the value lower than
  * 1 even when the "1" assertion wasn't true.
  */
-#define __mutex_fastpath_lock(count, fail_fn)				\
-do {									\
-	if (unlikely(atomic_dec_return(count) < 0))			\
-		fail_fn(count);						\
-	else								\
-		smp_mb();						\
-} while (0)
+static inline void
+__mutex_fastpath_lock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	if (unlikely(atomic_dec_return(count) < 0))
+		fail_fn(count);
+	else
+		smp_mb();
+}
 
 /**
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
@@ -36,7 +37,7 @@ do {									\
  * or anything the slow path function returns.
  */
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count, fastcall int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_dec_return(count) < 0))
 		return fail_fn(count);
@@ -59,12 +60,13 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
  * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
  * to return 0 otherwise.
  */
-#define __mutex_fastpath_unlock(count, fail_fn)				\
-do {									\
-	smp_mb();							\
-	if (unlikely(atomic_inc_return(count) <= 0))			\
-		fail_fn(count);						\
-} while (0)
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	smp_mb();
+	if (unlikely(atomic_inc_return(count) <= 0))
+		fail_fn(count);
+}
 
 #define __mutex_slowpath_needs_to_unlock()		1
 
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index 1d24f47e6c48..32a2100c1aeb 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -3,7 +3,7 @@
  *
  * Generic implementation of the mutex fastpath, based on xchg().
  *
- * NOTE: An xchg based implementation is less optimal than an atomic
+ * NOTE: An xchg based implementation might be less optimal than an atomic
  *       decrement/increment based implementation. If your architecture
  *       has a reasonable atomic dec/inc then you should probably use
  *	 asm-generic/mutex-dec.h instead, or you could open-code an
@@ -22,14 +22,14 @@
  * wasn't 1 originally. This function MUST leave the value lower than 1
  * even when the "1" assertion wasn't true.
  */
-#define __mutex_fastpath_lock(count, fail_fn)				\
-do {									\
-	if (unlikely(atomic_xchg(count, 0) != 1))			\
-		fail_fn(count);						\
-	else								\
-		smp_mb();						\
-} while (0)
-
+static inline void
+__mutex_fastpath_lock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	if (unlikely(atomic_xchg(count, 0) != 1))
+		fail_fn(count);
+	else
+		smp_mb();
+}
 
 /**
  *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
@@ -42,7 +42,7 @@ do {									\
  * or anything the slow path function returns
  */
 static inline int
-__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+__mutex_fastpath_lock_retval(atomic_t *count, fastcall int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_xchg(count, 0) != 1))
 		return fail_fn(count);
@@ -64,12 +64,13 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
  * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
  * to return 0 otherwise.
  */
-#define __mutex_fastpath_unlock(count, fail_fn)				\
-do {									\
-	smp_mb();							\
-	if (unlikely(atomic_xchg(count, 1) != 0))			\
-		fail_fn(count);						\
-} while (0)
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	smp_mb();
+	if (unlikely(atomic_xchg(count, 1) != 0))
+		fail_fn(count);
+}
 
 #define __mutex_slowpath_needs_to_unlock()		0
 
-- 
cgit v1.2.3


From e1211e3fa7fd05ff0d4f597fd37e40de8acc6784 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sat, 1 Apr 2006 01:38:18 +0900
Subject: [PATCH] libata: implement ata_dev_enabled and disabled()

This patch renames ata_dev_present() to ata_dev_enabled() and adds
ata_dev_disabled().  This is to discern the state where a device is
present but disabled from not-present state.  This disctinction is
necessary when configuring transfer mode because device selection
timing must not be violated even if a device fails to configure.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/scsi/libata-core.c | 24 ++++++++++++------------
 drivers/scsi/libata-scsi.c |  4 ++--
 drivers/scsi/sata_mv.c     |  2 +-
 drivers/scsi/sata_sil.c    |  2 +-
 include/linux/libata.h     | 16 +++++++++++++---
 5 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 559abe4ea4e9..c10c550da38b 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -411,7 +411,7 @@ static const char *sata_spd_string(unsigned int spd)
 
 static void ata_dev_disable(struct ata_port *ap, struct ata_device *dev)
 {
-	if (ata_dev_present(dev)) {
+	if (ata_dev_enabled(dev)) {
 		printk(KERN_WARNING "ata%u: dev %u disabled\n",
 		       ap->id, dev->devno);
 		dev->class++;
@@ -1222,7 +1222,7 @@ static int ata_dev_configure(struct ata_port *ap, struct ata_device *dev,
 	unsigned int xfer_mask;
 	int i, rc;
 
-	if (!ata_dev_present(dev)) {
+	if (!ata_dev_enabled(dev)) {
 		DPRINTK("ENTER/EXIT (host %u, dev %u) -- nodev\n",
 			ap->id, dev->devno);
 		return 0;
@@ -1401,7 +1401,7 @@ static int ata_bus_probe(struct ata_port *ap)
 
 		dev->class = classes[i];
 
-		if (!ata_dev_present(dev))
+		if (!ata_dev_enabled(dev))
 			continue;
 
 		WARN_ON(dev->id != NULL);
@@ -1565,7 +1565,7 @@ void sata_phy_reset(struct ata_port *ap)
 struct ata_device *ata_dev_pair(struct ata_port *ap, struct ata_device *adev)
 {
 	struct ata_device *pair = &ap->device[1 - adev->devno];
-	if (!ata_dev_present(pair))
+	if (!ata_dev_enabled(pair))
 		return NULL;
 	return pair;
 }
@@ -1778,7 +1778,7 @@ static int ata_host_set_pio(struct ata_port *ap)
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		struct ata_device *dev = &ap->device[i];
 
-		if (!ata_dev_present(dev))
+		if (!ata_dev_enabled(dev))
 			continue;
 
 		if (!dev->pio_mode) {
@@ -1802,7 +1802,7 @@ static void ata_host_set_dma(struct ata_port *ap)
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		struct ata_device *dev = &ap->device[i];
 
-		if (!ata_dev_present(dev) || !dev->dma_mode)
+		if (!ata_dev_enabled(dev) || !dev->dma_mode)
 			continue;
 
 		dev->xfer_mode = dev->dma_mode;
@@ -1830,7 +1830,7 @@ static void ata_set_mode(struct ata_port *ap)
 		struct ata_device *dev = &ap->device[i];
 		unsigned int pio_mask, dma_mask;
 
-		if (!ata_dev_present(dev))
+		if (!ata_dev_enabled(dev))
 			continue;
 
 		ata_dev_xfermask(ap, dev);
@@ -1858,7 +1858,7 @@ static void ata_set_mode(struct ata_port *ap)
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		struct ata_device *dev = &ap->device[i];
 
-		if (!ata_dev_present(dev))
+		if (!ata_dev_enabled(dev))
 			continue;
 
 		rc = ata_dev_set_mode(ap, dev);
@@ -2550,7 +2550,7 @@ int ata_dev_revalidate(struct ata_port *ap, struct ata_device *dev,
 	u16 *id;
 	int rc;
 
-	if (!ata_dev_present(dev))
+	if (!ata_dev_enabled(dev))
 		return -ENODEV;
 
 	class = dev->class;
@@ -2679,7 +2679,7 @@ static void ata_dev_xfermask(struct ata_port *ap, struct ata_device *dev)
 	/* FIXME: Use port-wide xfermask for now */
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		struct ata_device *d = &ap->device[i];
-		if (!ata_dev_present(d))
+		if (!ata_dev_enabled(d))
 			continue;
 		xfer_mask &= ata_pack_xfermask(d->pio_mask, d->mwdma_mask,
 					       d->udma_mask);
@@ -4299,7 +4299,7 @@ int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
 		ap->flags &= ~ATA_FLAG_SUSPENDED;
 		ata_set_mode(ap);
 	}
-	if (!ata_dev_present(dev))
+	if (!ata_dev_enabled(dev))
 		return 0;
 	if (dev->class == ATA_DEV_ATA)
 		ata_start_drive(ap, dev);
@@ -4317,7 +4317,7 @@ int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
  */
 int ata_device_suspend(struct ata_port *ap, struct ata_device *dev, pm_message_t state)
 {
-	if (!ata_dev_present(dev))
+	if (!ata_dev_enabled(dev))
 		return 0;
 	if (dev->class == ATA_DEV_ATA)
 		ata_flush_cache(ap, dev);
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 53f5b0d9161c..c1a4b29a9ae1 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -2349,7 +2349,7 @@ ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
 		     (scsidev->lun != 0)))
 		return NULL;
 
-	if (unlikely(!ata_dev_present(dev)))
+	if (unlikely(!ata_dev_enabled(dev)))
 		return NULL;
 
 	if (!atapi_enabled || (ap->flags & ATA_FLAG_NO_ATAPI)) {
@@ -2743,7 +2743,7 @@ void ata_scsi_scan_host(struct ata_port *ap)
 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
 		dev = &ap->device[i];
 
-		if (ata_dev_present(dev))
+		if (ata_dev_enabled(dev))
 			scsi_scan_target(&ap->host->shost_gendev, 0, i, 0, 0);
 	}
 }
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index fa901fd65085..0f7d334aadcc 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -1991,7 +1991,7 @@ comreset_retry:
 	tf.nsect = readb((void __iomem *) ap->ioaddr.nsect_addr);
 
 	dev->class = ata_dev_classify(&tf);
-	if (!ata_dev_present(dev)) {
+	if (!ata_dev_enabled(dev)) {
 		VPRINTK("Port disabled post-sig: No device present.\n");
 		ata_port_disable(ap);
 	}
diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c
index 18c296c56899..d6c7086a5379 100644
--- a/drivers/scsi/sata_sil.c
+++ b/drivers/scsi/sata_sil.c
@@ -264,7 +264,7 @@ static void sil_post_set_mode (struct ata_port *ap)
 
 	for (i = 0; i < 2; i++) {
 		dev = &ap->device[i];
-		if (!ata_dev_present(dev))
+		if (!ata_dev_enabled(dev))
 			dev_mode[i] = 0;	/* PIO0/1/2 */
 		else if (dev->flags & ATA_DFLAG_PIO)
 			dev_mode[i] = 1;	/* PIO3/4 */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 0d61357604d5..c6883ba8cba9 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -672,14 +672,24 @@ static inline unsigned int ata_tag_valid(unsigned int tag)
 	return (tag < ATA_MAX_QUEUE) ? 1 : 0;
 }
 
-static inline unsigned int ata_class_present(unsigned int class)
+static inline unsigned int ata_class_enabled(unsigned int class)
 {
 	return class == ATA_DEV_ATA || class == ATA_DEV_ATAPI;
 }
 
-static inline unsigned int ata_dev_present(const struct ata_device *dev)
+static inline unsigned int ata_class_disabled(unsigned int class)
 {
-	return ata_class_present(dev->class);
+	return class == ATA_DEV_ATA_UNSUP || class == ATA_DEV_ATAPI_UNSUP;
+}
+
+static inline unsigned int ata_dev_enabled(const struct ata_device *dev)
+{
+	return ata_class_enabled(dev->class);
+}
+
+static inline unsigned int ata_dev_disabled(const struct ata_device *dev)
+{
+	return ata_class_disabled(dev->class);
 }
 
 static inline u8 ata_chk_status(struct ata_port *ap)
-- 
cgit v1.2.3