From a9c667f8f0656631ee5438baaf21bf30d5f67375 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Mon, 6 Jun 2011 09:51:52 -0400
Subject: ext4: fixed tracepoints cleanup

While creating fixed tracepoints for ext3, basically by porting them
from ext4, I found a lot of useless retyping, wrong type usage, useless
variable passing and other inconsistencies in the ext4 fixed tracepoint
code.

This patch cleans the fixed tracepoint code for ext4 and also simplify
some of them.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 179 +++++++++++++++++++-------------------------
 1 file changed, 76 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index e09592d2f916..5ce2b2f5f524 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -26,7 +26,7 @@ TRACE_EVENT(ext4_free_inode,
 		__field(	umode_t, mode			)
 		__field(	uid_t,	uid			)
 		__field(	gid_t,	gid			)
-		__field(	blkcnt_t, blocks		)
+		__field(	__u64, blocks			)
 	),
 
 	TP_fast_assign(
@@ -40,9 +40,8 @@ TRACE_EVENT(ext4_free_inode,
 
 	TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->mode, __entry->uid, __entry->gid,
-		  (unsigned long long) __entry->blocks)
+		  (unsigned long) __entry->ino, __entry->mode,
+		  __entry->uid, __entry->gid, __entry->blocks)
 );
 
 TRACE_EVENT(ext4_request_inode,
@@ -178,7 +177,7 @@ TRACE_EVENT(ext4_begin_ordered_truncate,
 	TP_printk("dev %d,%d ino %lu new_size %lld",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (long long) __entry->new_size)
+		  __entry->new_size)
 );
 
 DECLARE_EVENT_CLASS(ext4__write_begin,
@@ -204,7 +203,7 @@ DECLARE_EVENT_CLASS(ext4__write_begin,
 		__entry->flags	= flags;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+	TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->flags)
@@ -248,7 +247,7 @@ DECLARE_EVENT_CLASS(ext4__write_end,
 		__entry->copied	= copied;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+	TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->copied)
@@ -286,29 +285,6 @@ DEFINE_EVENT(ext4__write_end, ext4_da_write_end,
 	TP_ARGS(inode, pos, len, copied)
 );
 
-TRACE_EVENT(ext4_writepage,
-	TP_PROTO(struct inode *inode, struct page *page),
-
-	TP_ARGS(inode, page),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	pgoff_t, index			)
-
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->index	= page->index;
-	),
-
-	TP_printk("dev %d,%d ino %lu page_index %lu",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino, __entry->index)
-);
-
 TRACE_EVENT(ext4_da_writepages,
 	TP_PROTO(struct inode *inode, struct writeback_control *wbc),
 
@@ -341,7 +317,7 @@ TRACE_EVENT(ext4_da_writepages,
 	),
 
 	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
-		  "range_start %llu range_end %llu sync_mode %d"
+		  "range_start %lld range_end %lld sync_mode %d"
 		  "for_kupdate %d range_cyclic %d writeback_index %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
@@ -449,7 +425,14 @@ DECLARE_EVENT_CLASS(ext4__page_op,
 	TP_printk("dev %d,%d ino %lu page_index %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->index)
+		  (unsigned long) __entry->index)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_writepage,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page)
 );
 
 DEFINE_EVENT(ext4__page_op, ext4_readpage,
@@ -489,7 +472,7 @@ TRACE_EVENT(ext4_invalidatepage,
 	TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->index, __entry->offset)
+		  (unsigned long) __entry->index, __entry->offset)
 );
 
 TRACE_EVENT(ext4_discard_blocks,
@@ -562,12 +545,10 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
-	TP_PROTO(struct super_block *sb,
-		 struct inode *inode,
-		 struct ext4_prealloc_space *pa,
+	TP_PROTO(struct ext4_prealloc_space *pa,
 		 unsigned long long block, unsigned int count),
 
-	TP_ARGS(sb, inode, pa, block, count),
+	TP_ARGS(pa, block, count),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -578,8 +559,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
-		__entry->ino		= inode->i_ino;
+		__entry->dev		= pa->pa_inode->i_sb->s_dev;
+		__entry->ino		= pa->pa_inode->i_ino;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
@@ -591,10 +572,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
-	TP_PROTO(struct super_block *sb,
-		 struct ext4_prealloc_space *pa),
+	TP_PROTO(struct ext4_prealloc_space *pa),
 
-	TP_ARGS(sb, pa),
+	TP_ARGS(pa),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -604,7 +584,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
+		__entry->dev		= pa->pa_inode->i_sb->s_dev;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
@@ -666,10 +646,10 @@ TRACE_EVENT(ext4_request_blocks,
 		__field(	ino_t,	ino			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
-		__field(	__u64,  logical			)
+		__field(	__u32,  logical			)
+		__field(	__u32,	lleft			)
+		__field(	__u32,	lright			)
 		__field(	__u64,	goal			)
-		__field(	__u64,	lleft			)
-		__field(	__u64,	lright			)
 		__field(	__u64,	pleft			)
 		__field(	__u64,	pright			)
 	),
@@ -687,17 +667,13 @@ TRACE_EVENT(ext4_request_blocks,
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu "
-		  "lleft %llu lright %llu pleft %llu pright %llu ",
+	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu "
+		  "lleft %u lright %u pleft %llu pright %llu ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->flags, __entry->len,
-		  (unsigned long long) __entry->logical,
-		  (unsigned long long) __entry->goal,
-		  (unsigned long long) __entry->lleft,
-		  (unsigned long long) __entry->lright,
-		  (unsigned long long) __entry->pleft,
-		  (unsigned long long) __entry->pright)
+		  (unsigned long) __entry->ino, __entry->flags,
+		  __entry->len, __entry->logical, __entry->goal,
+		  __entry->lleft, __entry->lright, __entry->pleft,
+		  __entry->pright)
 );
 
 TRACE_EVENT(ext4_allocate_blocks,
@@ -711,10 +687,10 @@ TRACE_EVENT(ext4_allocate_blocks,
 		__field(	__u64,	block			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
-		__field(	__u64,  logical			)
+		__field(	__u32,  logical			)
+		__field(	__u32,	lleft			)
+		__field(	__u32,	lright			)
 		__field(	__u64,	goal			)
-		__field(	__u64,	lleft			)
-		__field(	__u64,	lright			)
 		__field(	__u64,	pleft			)
 		__field(	__u64,	pright			)
 	),
@@ -733,17 +709,13 @@ TRACE_EVENT(ext4_allocate_blocks,
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu "
-		  "goal %llu lleft %llu lright %llu pleft %llu pright %llu",
+	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u "
+		  "goal %llu lleft %u lright %u pleft %llu pright %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->flags, __entry->len, __entry->block,
-		  (unsigned long long) __entry->logical,
-		  (unsigned long long) __entry->goal,
-		  (unsigned long long) __entry->lleft,
-		  (unsigned long long) __entry->lright,
-		  (unsigned long long) __entry->pleft,
-		  (unsigned long long) __entry->pright)
+		  (unsigned long) __entry->ino, __entry->flags,
+		  __entry->len, __entry->block, __entry->logical,
+		  __entry->goal,  __entry->lleft, __entry->lright,
+		  __entry->pleft, __entry->pright)
 );
 
 TRACE_EVENT(ext4_free_blocks,
@@ -755,10 +727,10 @@ TRACE_EVENT(ext4_free_blocks,
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
-		__field(      umode_t, mode			)
+		__field(	umode_t, mode			)
 		__field(	__u64,	block			)
 		__field(	unsigned long,	count		)
-		__field(	 int,	flags			)
+		__field(	int,	flags			)
 	),
 
 	TP_fast_assign(
@@ -798,7 +770,7 @@ TRACE_EVENT(ext4_sync_file_enter,
 		__entry->parent		= dentry->d_parent->d_inode->i_ino;
 	),
 
-	TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
+	TP_printk("dev %d,%d ino %lu parent %lu datasync %d ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->parent, __entry->datasync)
@@ -821,7 +793,7 @@ TRACE_EVENT(ext4_sync_file_exit,
 		__entry->dev		= inode->i_sb->s_dev;
 	),
 
-	TP_printk("dev %d,%d ino %ld ret %d",
+	TP_printk("dev %d,%d ino %lu ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->ret)
@@ -1005,7 +977,7 @@ DECLARE_EVENT_CLASS(ext4__mballoc,
 		__entry->result_len	= len;
 	),
 
-	TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
+	TP_printk("dev %d,%d inode %lu extent %u/%d/%d ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->result_group, __entry->result_start,
@@ -1093,7 +1065,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		  "allocated_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode,  (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1127,7 +1099,7 @@ TRACE_EVENT(ext4_da_reserve_space,
 		  "reserved_data_blocks %d reserved_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->md_needed, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks)
 );
@@ -1164,7 +1136,7 @@ TRACE_EVENT(ext4_da_release_space,
 		  "allocated_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->freed_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1239,14 +1211,15 @@ TRACE_EVENT(ext4_direct_IO_enter,
 		__entry->rw	= rw;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->len, __entry->rw)
+		  __entry->pos, __entry->len, __entry->rw)
 );
 
 TRACE_EVENT(ext4_direct_IO_exit,
-	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret),
+	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+		 int rw, int ret),
 
 	TP_ARGS(inode, offset, len, rw, ret),
 
@@ -1268,10 +1241,10 @@ TRACE_EVENT(ext4_direct_IO_exit,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->len,
+		  __entry->pos, __entry->len,
 		  __entry->rw, __entry->ret)
 );
 
@@ -1296,15 +1269,15 @@ TRACE_EVENT(ext4_fallocate_enter,
 		__entry->mode	= mode;
 	),
 
-	TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos,
-		  (unsigned long long) __entry->len, __entry->mode)
+		  (unsigned long) __entry->ino, __entry->pos,
+		  __entry->len, __entry->mode)
 );
 
 TRACE_EVENT(ext4_fallocate_exit,
-	TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret),
+	TP_PROTO(struct inode *inode, loff_t offset,
+		 unsigned int max_blocks, int ret),
 
 	TP_ARGS(inode, offset, max_blocks, ret),
 
@@ -1312,7 +1285,7 @@ TRACE_EVENT(ext4_fallocate_exit,
 		__field(	ino_t,	ino			)
 		__field(	dev_t,	dev			)
 		__field(	loff_t,	pos			)
-		__field(	unsigned,	blocks		)
+		__field(	unsigned int,	blocks		)
 		__field(	int, 	ret			)
 	),
 
@@ -1324,10 +1297,10 @@ TRACE_EVENT(ext4_fallocate_exit,
 		__entry->ret	= ret;
 	),
 
-	TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d",
+	TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->blocks,
+		  __entry->pos, __entry->blocks,
 		  __entry->ret)
 );
 
@@ -1350,7 +1323,7 @@ TRACE_EVENT(ext4_unlink_enter,
 		__entry->dev		= dentry->d_inode->i_sb->s_dev;
 	),
 
-	TP_printk("dev %d,%d ino %ld size %lld parent %ld",
+	TP_printk("dev %d,%d ino %lu size %lld parent %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->size,
 		  (unsigned long) __entry->parent)
@@ -1373,7 +1346,7 @@ TRACE_EVENT(ext4_unlink_exit,
 		__entry->ret		= ret;
 	),
 
-	TP_printk("dev %d,%d ino %ld ret %d",
+	TP_printk("dev %d,%d ino %lu ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->ret)
@@ -1387,7 +1360,7 @@ DECLARE_EVENT_CLASS(ext4__truncate,
 	TP_STRUCT__entry(
 		__field(	ino_t,  	ino		)
 		__field(	dev_t,  	dev		)
-		__field(	blkcnt_t,	blocks		)
+		__field(	__u64,		blocks		)
 	),
 
 	TP_fast_assign(
@@ -1396,9 +1369,9 @@ DECLARE_EVENT_CLASS(ext4__truncate,
 		__entry->blocks	= inode->i_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu blocks %lu",
+	TP_printk("dev %d,%d ino %lu blocks %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+		  (unsigned long) __entry->ino, __entry->blocks)
 );
 
 DEFINE_EVENT(ext4__truncate, ext4_truncate_enter,
@@ -1417,7 +1390,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
 
 DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 unsigned len, unsigned flags),
+		 unsigned int len, unsigned int flags),
 
 	TP_ARGS(inode, lblk, len, flags),
 
@@ -1425,8 +1398,8 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
 		__field(	ino_t,  	ino		)
 		__field(	dev_t,  	dev		)
 		__field(	ext4_lblk_t,	lblk		)
-		__field(	unsigned,	len		)
-		__field(	unsigned,	flags		)
+		__field(	unsigned int,	len		)
+		__field(	unsigned int,	flags		)
 	),
 
 	TP_fast_assign(
@@ -1440,7 +1413,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
 	TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, __entry->len, __entry->flags)
+		  __entry->lblk, __entry->len, __entry->flags)
 );
 
 DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter,
@@ -1459,7 +1432,7 @@ DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter,
 
 DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 ext4_fsblk_t pblk, unsigned len, int ret),
+		 ext4_fsblk_t pblk, unsigned int len, int ret),
 
 	TP_ARGS(inode, lblk, pblk, len, ret),
 
@@ -1468,7 +1441,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
 		__field(	dev_t,		dev		)
 		__field(	ext4_lblk_t,	lblk		)
 		__field(	ext4_fsblk_t,	pblk		)
-		__field(	unsigned,	len		)
+		__field(	unsigned int,	len		)
 		__field(	int,		ret		)
 	),
 
@@ -1484,7 +1457,7 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
 	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+		  __entry->lblk, __entry->pblk,
 		  __entry->len, __entry->ret)
 );
 
@@ -1524,7 +1497,7 @@ TRACE_EVENT(ext4_ext_load_extent,
 	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk)
+		  __entry->lblk, __entry->pblk)
 );
 
 TRACE_EVENT(ext4_load_inode,
-- 
cgit v1.2.3


From ea2c00095c022846dd8dfd211de05154d3e4e1b8 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 17 May 2011 15:25:37 -0700
Subject: Connector: Set the CN_NETLINK_USERS correctly

The CN_NETLINK_USERS must be set to the highest valid index +1.
Thanks to Evgeniy for pointing this out.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Cc: stable <stable@kernel.org> [.39]
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/connector.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/connector.h b/include/linux/connector.h
index 7c60d0942adb..f696bccd48cb 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -44,7 +44,7 @@
 #define CN_VAL_DRBD			0x1
 #define CN_KVP_IDX			0x9	/* HyperV KVP */
 
-#define CN_NETLINK_USERS		9
+#define CN_NETLINK_USERS		10	/* Highest index + 1 */
 
 /*
  * Maximum connector's message size.
-- 
cgit v1.2.3


From 08e8138adebdd511e0955e8d6c051904bb4082af Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 13 Jun 2011 10:42:49 +0200
Subject: block: Add __attribute__((format(printf...) and fix fallout

Use the compiler to verify format strings and arguments.

Fix fallout.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-throttle.c         |  4 ++--
 block/cfq-iosched.c          | 11 ++++++-----
 include/linux/blktrace_api.h |  3 ++-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a62be8d0dc1b..3689f833afdc 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -927,7 +927,7 @@ static int throtl_dispatch(struct request_queue *q)
 
 	bio_list_init(&bio_list_on_stack);
 
-	throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u",
+	throtl_log(td, "dispatch nr_queued=%d read=%u write=%u",
 			total_nr_queued(td), td->nr_queued[READ],
 			td->nr_queued[WRITE]);
 
@@ -1204,7 +1204,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
 	}
 
 queue_bio:
-	throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu"
+	throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
 			" iodisp=%u iops=%u queued=%d/%d",
 			rw == READ ? 'R' : 'W',
 			tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 545b8d4c829d..f3799432676d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -988,9 +988,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
 
 	cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
 					st->min_vdisktime);
-	cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u"
-			" sect=%u", used_sl, cfqq->slice_dispatch, charge,
-			iops_mode(cfqd), cfqq->nr_sectors);
+	cfq_log_cfqq(cfqq->cfqd, cfqq,
+		     "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+		     used_sl, cfqq->slice_dispatch, charge,
+		     iops_mode(cfqd), cfqq->nr_sectors);
 	cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
 					  unaccounted_sl);
 	cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
@@ -2023,8 +2024,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 */
 	if (sample_valid(cic->ttime_samples) &&
 	    (cfqq->slice_end - jiffies < cic->ttime_mean)) {
-		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
-				cic->ttime_mean);
+		cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+			     cic->ttime_mean);
 		return;
 	}
 
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index b22fb0d3db0f..8c7c2de7631a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -169,7 +169,8 @@ extern void blk_trace_shutdown(struct request_queue *);
 extern int do_blk_trace_setup(struct request_queue *q, char *name,
 			      dev_t dev, struct block_device *bdev,
 			      struct blk_user_trace_setup *buts);
-extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
+extern __attribute__((format(printf, 2, 3)))
+void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
  * blk_add_trace_msg - Add a (simple) message to the blktrace stream
-- 
cgit v1.2.3


From e9c039052be59753e6bcc7c8b59763899dc1161c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 13 Jun 2011 19:05:58 +0100
Subject: ASoC: Remove unused and about to be broken SND_SOC_CUSTOM I/O bus

This will be removed in -next so let's drop it from mainline as soon as
we can in order to minimise surprises.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/sound/soc.h   | 3 +--
 sound/soc/soc-cache.c | 3 ---
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index f1de3e0c75bc..3a4bd3a3c68d 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -248,8 +248,7 @@ typedef int (*hw_write_t)(void *,const char* ,int);
 extern struct snd_ac97_bus_ops soc_ac97_ops;
 
 enum snd_soc_control_type {
-	SND_SOC_CUSTOM = 1,
-	SND_SOC_I2C,
+	SND_SOC_I2C = 1,
 	SND_SOC_SPI,
 };
 
diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c
index c005ceb70c9d..039b9532b270 100644
--- a/sound/soc/soc-cache.c
+++ b/sound/soc/soc-cache.c
@@ -409,9 +409,6 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec,
 	codec->bulk_write_raw = snd_soc_hw_bulk_write_raw;
 
 	switch (control) {
-	case SND_SOC_CUSTOM:
-		break;
-
 	case SND_SOC_I2C:
 #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE))
 		codec->hw_write = (hw_write_t)i2c_master_send;
-- 
cgit v1.2.3


From de1b794130b130e77ffa975bb58cb843744f9ae5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Jun 2011 15:38:22 -0400
Subject: jbd2: Fix oops in jbd2_journal_remove_journal_head()

jbd2_journal_remove_journal_head() can oops when trying to access
journal_head returned by bh2jh(). This is caused for example by the
following race:

	TASK1					TASK2
  jbd2_journal_commit_transaction()
    ...
    processing t_forget list
      __jbd2_journal_refile_buffer(jh);
      if (!jh->b_transaction) {
        jbd_unlock_bh_state(bh);
					jbd2_journal_try_to_free_buffers()
					  jbd2_journal_grab_journal_head(bh)
					  jbd_lock_bh_state(bh)
					  __journal_try_to_free_buffer()
					  jbd2_journal_put_journal_head(jh)
        jbd2_journal_remove_journal_head(bh);

jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and
buffer is not part of any transaction and thus frees journal_head
before TASK1 gets to doing so. Note that even buffer_head can be
released by try_to_free_buffers() after
jbd2_journal_put_journal_head() which adds even larger opportunity for
oops (but I didn't see this happen in reality).

Fix the problem by making transactions hold their own journal_head
reference (in b_jcount). That way we don't have to remove journal_head
explicitely via jbd2_journal_remove_journal_head() and instead just
remove journal_head when b_jcount drops to zero. The result of this is
that [__]jbd2_journal_refile_buffer(),
[__]jbd2_journal_unfile_buffer(), and
__jdb2_journal_remove_checkpoint() can free journal_head which needs
modification of a few callers. Also we have to be careful because once
journal_head is removed, buffer_head might be freed as well. So we
have to get our own buffer_head reference where it matters.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c  | 28 +++++++++-------
 fs/jbd2/commit.c      | 33 +++++++++++--------
 fs/jbd2/journal.c     | 91 ++++++++++++++++-----------------------------------
 fs/jbd2/transaction.c | 67 +++++++++++++++++++------------------
 include/linux/jbd2.h  |  2 --
 5 files changed, 99 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6a79fd0a1a32..2c62c5aae82f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
 	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+		/*
+		 * Get our reference so that bh cannot be freed before
+		 * we unlock it
+		 */
+		get_bh(bh);
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
 	} else {
@@ -223,8 +227,8 @@ restart:
 			spin_lock(&journal->j_list_lock);
 			goto restart;
 		}
+		get_bh(bh);
 		if (buffer_locked(bh)) {
-			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
@@ -243,7 +247,6 @@ restart:
 		 */
 		released = __jbd2_journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 	int ret = 0;
 
 	if (buffer_locked(bh)) {
-		atomic_inc(&bh->b_count);
+		get_bh(bh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
 		wait_on_buffer(bh);
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 		ret = 1;
 		if (unlikely(buffer_write_io_error(bh)))
 			ret = -EIO;
+		get_bh(bh);
 		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 		BUFFER_TRACE(bh, "remove from checkpoint");
 		__jbd2_journal_remove_checkpoint(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	} else {
 		/*
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 /*
  * journal_clean_one_cp_list
  *
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and
+ * release them.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
@@ -663,8 +667,8 @@ out:
  * checkpoint lists.
  *
  * The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
  *
- * This function is called with the journal locked.
  * This function is called with j_list_lock held.
  * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	}
 	journal = transaction->t_journal;
 
+	JBUFFER_TRACE(jh, "removing from transaction");
 	__buffer_unlink(jh);
 	jh->b_cp_transaction = NULL;
+	jbd2_journal_put_journal_head(jh);
 
 	if (transaction->t_checkpoint_list != NULL ||
 	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
-	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
 	/*
 	 * There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	 * The locking here around t_state is a bit sleazy.
 	 * See the comment at the end of jbd2_journal_commit_transaction().
 	 */
-	if (transaction->t_state != T_FINISHED) {
-		JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+	if (transaction->t_state != T_FINISHED)
 		goto out;
-	}
 
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	wake_up(&journal->j_wait_logspace);
 	ret = 1;
 out:
-	JBUFFER_TRACE(jh, "exit");
 	return ret;
 }
 
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 
+	/* Get reference for checkpointing transaction */
+	jbd2_journal_grab_journal_head(jh2bh(jh));
 	jh->b_cp_transaction = transaction;
 
 	if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7f21cf3aaf92..eef6979821a4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -848,10 +848,16 @@ restart_loop:
 	while (commit_transaction->t_forget) {
 		transaction_t *cp_transaction;
 		struct buffer_head *bh;
+		int try_to_free = 0;
 
 		jh = commit_transaction->t_forget;
 		spin_unlock(&journal->j_list_lock);
 		bh = jh2bh(jh);
+		/*
+		 * Get a reference so that bh cannot be freed before we are
+		 * done with it.
+		 */
+		get_bh(bh);
 		jbd_lock_bh_state(bh);
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
 
@@ -914,28 +920,27 @@ restart_loop:
 			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
 			if (is_journal_aborted(journal))
 				clear_buffer_jbddirty(bh);
-			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-			__jbd2_journal_refile_buffer(jh);
-			jbd_unlock_bh_state(bh);
 		} else {
 			J_ASSERT_BH(bh, !buffer_dirty(bh));
-			/* The buffer on BJ_Forget list and not jbddirty means
+			/*
+			 * The buffer on BJ_Forget list and not jbddirty means
 			 * it has been freed by this transaction and hence it
 			 * could not have been reallocated until this
 			 * transaction has committed. *BUT* it could be
 			 * reallocated once we have written all the data to
 			 * disk and before we process the buffer on BJ_Forget
-			 * list. */
-			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-			__jbd2_journal_refile_buffer(jh);
-			if (!jh->b_transaction) {
-				jbd_unlock_bh_state(bh);
-				 /* needs a brelse */
-				jbd2_journal_remove_journal_head(bh);
-				release_buffer_page(bh);
-			} else
-				jbd_unlock_bh_state(bh);
+			 * list.
+			 */
+			if (!jh->b_next_transaction)
+				try_to_free = 1;
 		}
+		JBUFFER_TRACE(jh, "refile or unfile buffer");
+		__jbd2_journal_refile_buffer(jh);
+		jbd_unlock_bh_state(bh);
+		if (try_to_free)
+			release_buffer_page(bh);	/* Drops bh reference */
+		else
+			__brelse(bh);
 		cond_resched_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9a7826990304..0dfa5b598e68 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
  * When a buffer has its BH_JBD bit set it is immune from being released by
  * core kernel code, mainly via ->b_count.
  *
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
  *
  * Various places in the kernel want to attach a journal_head to a buffer_head
  * _before_ attaching the journal_head to a transaction.  To protect the
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
  *	(Attach a journal_head if needed.  Increments b_jcount)
  *	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
  *	...
+ *      (Get another reference for transaction)
+ *	jbd2_journal_grab_journal_head(bh);
  *	jh->b_transaction = xxx;
+ *	(Put original reference)
  *	jbd2_journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
  */
 
 /*
  * Give a buffer_head a journal_head.
  *
- * Doesn't need the journal lock.
  * May sleep.
  */
 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 	struct journal_head *jh = bh2jh(bh);
 
 	J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
-	get_bh(bh);
-	if (jh->b_jcount == 0) {
-		if (jh->b_transaction == NULL &&
-				jh->b_next_transaction == NULL &&
-				jh->b_cp_transaction == NULL) {
-			J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
-			J_ASSERT_BH(bh, buffer_jbd(bh));
-			J_ASSERT_BH(bh, jh2bh(jh) == bh);
-			BUFFER_TRACE(bh, "remove journal_head");
-			if (jh->b_frozen_data) {
-				printk(KERN_WARNING "%s: freeing "
-						"b_frozen_data\n",
-						__func__);
-				jbd2_free(jh->b_frozen_data, bh->b_size);
-			}
-			if (jh->b_committed_data) {
-				printk(KERN_WARNING "%s: freeing "
-						"b_committed_data\n",
-						__func__);
-				jbd2_free(jh->b_committed_data, bh->b_size);
-			}
-			bh->b_private = NULL;
-			jh->b_bh = NULL;	/* debug, really */
-			clear_buffer_jbd(bh);
-			__brelse(bh);
-			journal_free_journal_head(jh);
-		} else {
-			BUFFER_TRACE(bh, "journal_head was locked");
-		}
+	J_ASSERT_JH(jh, jh->b_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+	J_ASSERT_BH(bh, buffer_jbd(bh));
+	J_ASSERT_BH(bh, jh2bh(jh) == bh);
+	BUFFER_TRACE(bh, "remove journal_head");
+	if (jh->b_frozen_data) {
+		printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+		jbd2_free(jh->b_frozen_data, bh->b_size);
 	}
+	if (jh->b_committed_data) {
+		printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+		jbd2_free(jh->b_committed_data, bh->b_size);
+	}
+	bh->b_private = NULL;
+	jh->b_bh = NULL;	/* debug, really */
+	clear_buffer_jbd(bh);
+	journal_free_journal_head(jh);
 }
 
 /*
- * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head.   If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
- * time.  Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void jbd2_journal_remove_journal_head(struct buffer_head *bh)
-{
-	jbd_lock_bh_journal_head(bh);
-	__journal_remove_journal_head(bh);
-	jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * Drop a reference on the passed journal_head.  If it fell to zero then
  * release the journal_head from the buffer_head.
  */
 void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
 	jbd_lock_bh_journal_head(bh);
 	J_ASSERT_JH(jh, jh->b_jcount > 0);
 	--jh->b_jcount;
-	if (!jh->b_jcount && !jh->b_transaction) {
+	if (!jh->b_jcount) {
 		__journal_remove_journal_head(bh);
+		jbd_unlock_bh_journal_head(bh);
 		__brelse(bh);
-	}
-	jbd_unlock_bh_journal_head(bh);
+	} else
+		jbd_unlock_bh_journal_head(bh);
 }
 
 /*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 547b101049e5..2d7109414cdd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
 
 /*
  * jbd2_get_transaction: obtain a new transaction_t object.
@@ -764,7 +765,6 @@ repeat:
 	if (!jh->b_transaction) {
 		JBUFFER_TRACE(jh, "no transaction");
 		J_ASSERT_JH(jh, !jh->b_next_transaction);
-		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		spin_lock(&journal->j_list_lock);
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 		 * committed and so it's safe to clear the dirty bit.
 		 */
 		clear_buffer_dirty(jh2bh(jh));
-		jh->b_transaction = transaction;
-
 		/* first access by this transaction */
 		jh->b_modified = 0;
 
@@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		} else {
 			__jbd2_journal_unfile_buffer(jh);
-			jbd2_journal_remove_journal_head(bh);
-			__brelse(bh);
 			if (!buffer_jbd(bh)) {
 				spin_unlock(&journal->j_list_lock);
 				jbd_unlock_bh_state(bh);
@@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
 		mark_buffer_dirty(bh);	/* Expose it to the VM */
 }
 
-void __jbd2_journal_unfile_buffer(struct journal_head *jh)
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
 {
 	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = NULL;
+	jbd2_journal_put_journal_head(jh);
 }
 
 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
 {
-	jbd_lock_bh_state(jh2bh(jh));
+	struct buffer_head *bh = jh2bh(jh);
+
+	/* Get reference so that buffer cannot be freed before we unlock it */
+	get_bh(bh);
+	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 	__jbd2_journal_unfile_buffer(jh);
 	spin_unlock(&journal->j_list_lock);
-	jbd_unlock_bh_state(jh2bh(jh));
+	jbd_unlock_bh_state(bh);
+	__brelse(bh);
 }
 
 /*
@@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
 		if (jh->b_jlist == BJ_None) {
 			JBUFFER_TRACE(jh, "remove from checkpoint list");
 			__jbd2_journal_remove_checkpoint(jh);
-			jbd2_journal_remove_journal_head(bh);
-			__brelse(bh);
 		}
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
 		/*
 		 * We take our own ref against the journal_head here to avoid
 		 * having to add tons of locking around each instance of
-		 * jbd2_journal_remove_journal_head() and
 		 * jbd2_journal_put_journal_head().
 		 */
 		jh = jbd2_journal_grab_journal_head(bh);
@@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 	int may_free = 1;
 	struct buffer_head *bh = jh2bh(jh);
 
-	__jbd2_journal_unfile_buffer(jh);
-
 	if (jh->b_cp_transaction) {
 		JBUFFER_TRACE(jh, "on running+cp transaction");
+		__jbd2_journal_temp_unlink_buffer(jh);
 		/*
 		 * We don't want to write the buffer anymore, clear the
 		 * bit so that we don't confuse checks in
@@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
-		jbd2_journal_remove_journal_head(bh);
-		__brelse(bh);
+		__jbd2_journal_unfile_buffer(jh);
 	}
 	return may_free;
 }
@@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
 
 	if (jh->b_transaction)
 		__jbd2_journal_temp_unlink_buffer(jh);
+	else
+		jbd2_journal_grab_journal_head(bh);
 	jh->b_transaction = transaction;
 
 	switch (jlist) {
@@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
  * already started to be used by a subsequent transaction, refile the
  * buffer on that transaction's metadata list.
  *
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
  */
 void __jbd2_journal_refile_buffer(struct journal_head *jh)
 {
@@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
 
 	was_dirty = test_clear_buffer_jbddirty(bh);
 	__jbd2_journal_temp_unlink_buffer(jh);
+	/*
+	 * We set b_transaction here because b_next_transaction will inherit
+	 * our jh reference and thus __jbd2_journal_file_buffer() must not
+	 * take a new one.
+	 */
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
 	if (buffer_freed(bh))
@@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
 }
 
 /*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __jbd2_journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists.  In that case it is up
- * to the caller to remove the journal_head if necessary.  For the
- * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __jbd2_journal_refile_buffer() with necessary locking added. We take our
+ * bh reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
  */
 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
+	/* Get reference so that buffer cannot be freed before we unlock it */
+	get_bh(bh);
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
-
 	__jbd2_journal_refile_buffer(jh);
 	jbd_unlock_bh_state(bh);
-	jbd2_journal_remove_journal_head(bh);
-
 	spin_unlock(&journal->j_list_lock);
 	__brelse(bh);
 }
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4ecb7b16b278..d087c2e7b2aa 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1024,7 +1024,6 @@ struct journal_s
 
 /* Filing buffers */
 extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
-extern void __jbd2_journal_unfile_buffer(struct journal_head *);
 extern void __jbd2_journal_refile_buffer(struct journal_head *);
 extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
 extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
@@ -1165,7 +1164,6 @@ extern void	   jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_in
  */
 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh);
 struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh);
-void jbd2_journal_remove_journal_head(struct buffer_head *bh);
 void jbd2_journal_put_journal_head(struct journal_head *jh);
 
 /*
-- 
cgit v1.2.3


From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 31 May 2011 15:15:34 -0400
Subject: NLM: Don't hang forever on NLM unlock requests

If the NLM daemon is killed on the NFS server, we can currently end up
hanging forever on an 'unlock' request, instead of aborting. Basically,
if the rpcbind request fails, or the server keeps returning garbage, we
really want to quit instead of retrying.

Tested-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/lockd/clntproc.c          | 8 +++++++-
 include/linux/sunrpc/sched.h | 3 ++-
 net/sunrpc/clnt.c            | 3 +++
 net/sunrpc/sched.c           | 1 +
 4 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index adb45ec9038c..e374050a911c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -708,7 +708,13 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
-		goto retry_rebind;
+		switch (task->tk_status) {
+		case -EACCES:
+		case -EIO:
+			goto die;
+		default:
+			goto retry_rebind;
+		}
 	}
 	if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 		rpc_delay(task, NLMCLNT_GRACE_WAIT);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index f73c482ec9c6..fe2d8e6b923b 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -84,7 +84,8 @@ struct rpc_task {
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
-				tk_cred_retry : 2;
+				tk_cred_retry : 2,
+				tk_rebind_retry : 2;
 };
 #define tk_xprt			tk_client->cl_xprt
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b84d7395535e..566bcfd067f6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1175,6 +1175,9 @@ call_bind_status(struct rpc_task *task)
 			status = -EOPNOTSUPP;
 			break;
 		}
+		if (task->tk_rebind_retry == 0)
+			break;
+		task->tk_rebind_retry--;
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
 	case -ETIMEDOUT:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6b43ee7221d5..a27406b1654f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -792,6 +792,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
 	task->tk_cred_retry = 2;
+	task->tk_rebind_retry = 2;
 
 	task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
 	task->tk_owner = current->tgid;
-- 
cgit v1.2.3


From c9c30dd5f73dccaa326a54dfcf490316946aea87 Mon Sep 17 00:00:00 2001
From: Benny Halevy <benny@tonian.com>
Date: Sat, 11 Jun 2011 17:08:39 -0400
Subject: NFSv4.1: deprecate headerpadsz in CREATE_SESSION

We don't support header padding yet so better off ditching it

Reported-by: Sid Moore <learnmost@gmail.com>
Signed-off-by: Benny Halevy <benny@tonian.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  4 ----
 fs/nfs/nfs4xdr.c        | 10 ++++++----
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2c4b59c896d..3ca449789545 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5098,7 +5098,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 	if (mxresp_sz == 0)
 		mxresp_sz = NFS_MAX_FILE_IO_SIZE;
 	/* Fore channel attributes */
-	args->fc_attrs.headerpadsz = 0;
 	args->fc_attrs.max_rqst_sz = mxrqst_sz;
 	args->fc_attrs.max_resp_sz = mxresp_sz;
 	args->fc_attrs.max_ops = NFS4_MAX_OPS;
@@ -5111,7 +5110,6 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 		args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
 
 	/* Back channel attributes */
-	args->bc_attrs.headerpadsz = 0;
 	args->bc_attrs.max_rqst_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz_cached = 0;
@@ -5131,8 +5129,6 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args
 	struct nfs4_channel_attrs *sent = &args->fc_attrs;
 	struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
 
-	if (rcvd->headerpadsz > sent->headerpadsz)
-		return -EINVAL;
 	if (rcvd->max_resp_sz > sent->max_resp_sz)
 		return -EINVAL;
 	/*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d869a5e5464b..c4b7d6c04948 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1725,7 +1725,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(args->flags);			/*flags */
 
 	/* Fore Channel */
-	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
+	*p++ = cpu_to_be32(0);				/* header padding size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(max_resp_sz_cached);		/* Max resp sz cached */
@@ -1734,7 +1734,7 @@ static void encode_create_session(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0);				/* rdmachannel_attrs */
 
 	/* Back Channel */
-	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
+	*p++ = cpu_to_be32(0);				/* header padding size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
@@ -4997,12 +4997,14 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
 			     struct nfs4_channel_attrs *attrs)
 {
 	__be32 *p;
-	u32 nr_attrs;
+	u32 nr_attrs, val;
 
 	p = xdr_inline_decode(xdr, 28);
 	if (unlikely(!p))
 		goto out_overflow;
-	attrs->headerpadsz = be32_to_cpup(p++);
+	val = be32_to_cpup(p++);	/* headerpadsz */
+	if (val)
+		return -EINVAL;		/* no support for header padding yet */
 	attrs->max_rqst_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz_cached = be32_to_cpup(p++);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5e8444a11adf..00848d86ffb2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -158,7 +158,6 @@ struct nfs_seqid;
 
 /* nfs41 sessions channel attributes */
 struct nfs4_channel_attrs {
-	u32			headerpadsz;
 	u32			max_rqst_sz;
 	u32			max_resp_sz;
 	u32			max_resp_sz_cached;
-- 
cgit v1.2.3


From c16d51a32bbb61ac8fd96f78b5ce2fccfe0fb4c3 Mon Sep 17 00:00:00 2001
From: Shreshtha Kumar Sahu <shreshthakumar.sahu@stericsson.com>
Date: Mon, 13 Jun 2011 10:11:33 +0200
Subject: amba pl011: workaround for uart registers lockup

This workaround aims to break the deadlock situation
which raises during continuous transfer of data for long
duration over uart with hardware flow control. It is
observed that CTS interrupt cannot be cleared in uart
interrupt register (ICR). Hence further transfer over
uart gets blocked.

It is seen that during such deadlock condition ICR
don't get cleared even on multiple write. This leads
pass_counter to decrease and finally reach zero. This
can be taken as trigger point to run this UART_BT_WA.

Workaround backups the register configuration, does soft
reset of UART using BIT-0 of PRCC_K_SOFTRST_SET/CLEAR
registers and restores the registers.

This patch also provides support for uart init and exit
function calls if present.

Signed-off-by: Shreshtha Kumar Sahu <shreshthakumar.sahu@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/serial/amba-pl011.c | 123 +++++++++++++++++++++++++++++++++++++++-
 include/linux/amba/serial.h     |   3 +
 2 files changed, 125 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8dc0541feecc..f5f6831b0a64 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -50,6 +50,7 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
+#include <linux/delay.h>
 
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -65,6 +66,30 @@
 #define UART_DR_ERROR		(UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE)
 #define UART_DUMMY_DR_RX	(1 << 16)
 
+
+#define UART_WA_SAVE_NR 14
+
+static void pl011_lockup_wa(unsigned long data);
+static const u32 uart_wa_reg[UART_WA_SAVE_NR] = {
+	ST_UART011_DMAWM,
+	ST_UART011_TIMEOUT,
+	ST_UART011_LCRH_RX,
+	UART011_IBRD,
+	UART011_FBRD,
+	ST_UART011_LCRH_TX,
+	UART011_IFLS,
+	ST_UART011_XFCR,
+	ST_UART011_XON1,
+	ST_UART011_XON2,
+	ST_UART011_XOFF1,
+	ST_UART011_XOFF2,
+	UART011_CR,
+	UART011_IMSC
+};
+
+static u32 uart_wa_regdata[UART_WA_SAVE_NR];
+static DECLARE_TASKLET(pl011_lockup_tlet, pl011_lockup_wa, 0);
+
 /* There is by now at least one vendor with differing details, so handle it */
 struct vendor_data {
 	unsigned int		ifls;
@@ -72,6 +97,7 @@ struct vendor_data {
 	unsigned int		lcrh_tx;
 	unsigned int		lcrh_rx;
 	bool			oversampling;
+	bool			interrupt_may_hang;   /* vendor-specific */
 	bool			dma_threshold;
 };
 
@@ -90,9 +116,12 @@ static struct vendor_data vendor_st = {
 	.lcrh_tx		= ST_UART011_LCRH_TX,
 	.lcrh_rx		= ST_UART011_LCRH_RX,
 	.oversampling		= true,
+	.interrupt_may_hang	= true,
 	.dma_threshold		= true,
 };
 
+static struct uart_amba_port *amba_ports[UART_NR];
+
 /* Deals with DMA transactions */
 
 struct pl011_sgbuf {
@@ -132,6 +161,7 @@ struct uart_amba_port {
 	unsigned int		lcrh_rx;	/* vendor-specific */
 	bool			autorts;
 	char			type[12];
+	bool			interrupt_may_hang; /* vendor-specific */
 #ifdef CONFIG_DMA_ENGINE
 	/* DMA stuff */
 	bool			using_tx_dma;
@@ -1008,6 +1038,68 @@ static inline bool pl011_dma_rx_running(struct uart_amba_port *uap)
 #endif
 
 
+/*
+ * pl011_lockup_wa
+ * This workaround aims to break the deadlock situation
+ * when after long transfer over uart in hardware flow
+ * control, uart interrupt registers cannot be cleared.
+ * Hence uart transfer gets blocked.
+ *
+ * It is seen that during such deadlock condition ICR
+ * don't get cleared even on multiple write. This leads
+ * pass_counter to decrease and finally reach zero. This
+ * can be taken as trigger point to run this UART_BT_WA.
+ *
+ */
+static void pl011_lockup_wa(unsigned long data)
+{
+	struct uart_amba_port *uap = amba_ports[0];
+	void __iomem *base = uap->port.membase;
+	struct circ_buf *xmit = &uap->port.state->xmit;
+	struct tty_struct *tty = uap->port.state->port.tty;
+	int buf_empty_retries = 200;
+	int loop;
+
+	/* Stop HCI layer from submitting data for tx */
+	tty->hw_stopped = 1;
+	while (!uart_circ_empty(xmit)) {
+		if (buf_empty_retries-- == 0)
+			break;
+		udelay(100);
+	}
+
+	/* Backup registers */
+	for (loop = 0; loop < UART_WA_SAVE_NR; loop++)
+		uart_wa_regdata[loop] = readl(base + uart_wa_reg[loop]);
+
+	/* Disable UART so that FIFO data is flushed out */
+	writew(0x00, uap->port.membase + UART011_CR);
+
+	/* Soft reset UART module */
+	if (uap->port.dev->platform_data) {
+		struct amba_pl011_data *plat;
+
+		plat = uap->port.dev->platform_data;
+		if (plat->reset)
+			plat->reset();
+	}
+
+	/* Restore registers */
+	for (loop = 0; loop < UART_WA_SAVE_NR; loop++)
+		writew(uart_wa_regdata[loop] ,
+				uap->port.membase + uart_wa_reg[loop]);
+
+	/* Initialise the old status of the modem signals */
+	uap->old_status = readw(uap->port.membase + UART01x_FR) &
+		UART01x_FR_MODEM_ANY;
+
+	if (readl(base + UART011_MIS) & 0x2)
+		printk(KERN_EMERG "UART_BT_WA: ***FAILED***\n");
+
+	/* Start Tx/Rx */
+	tty->hw_stopped = 0;
+}
+
 static void pl011_stop_tx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
@@ -1158,8 +1250,11 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
 			if (status & UART011_TXIS)
 				pl011_tx_chars(uap);
 
-			if (pass_counter-- == 0)
+			if (pass_counter-- == 0) {
+				if (uap->interrupt_may_hang)
+					tasklet_schedule(&pl011_lockup_tlet);
 				break;
+			}
 
 			status = readw(uap->port.membase + UART011_MIS);
 		} while (status != 0);
@@ -1339,6 +1434,14 @@ static int pl011_startup(struct uart_port *port)
 	writew(uap->im, uap->port.membase + UART011_IMSC);
 	spin_unlock_irq(&uap->port.lock);
 
+	if (uap->port.dev->platform_data) {
+		struct amba_pl011_data *plat;
+
+		plat = uap->port.dev->platform_data;
+		if (plat->init)
+			plat->init();
+	}
+
 	return 0;
 
  clk_dis:
@@ -1394,6 +1497,15 @@ static void pl011_shutdown(struct uart_port *port)
 	 * Shut down the clock producer
 	 */
 	clk_disable(uap->clk);
+
+	if (uap->port.dev->platform_data) {
+		struct amba_pl011_data *plat;
+
+		plat = uap->port.dev->platform_data;
+		if (plat->exit)
+			plat->exit();
+	}
+
 }
 
 static void
@@ -1700,6 +1812,14 @@ static int __init pl011_console_setup(struct console *co, char *options)
 	if (!uap)
 		return -ENODEV;
 
+	if (uap->port.dev->platform_data) {
+		struct amba_pl011_data *plat;
+
+		plat = uap->port.dev->platform_data;
+		if (plat->init)
+			plat->init();
+	}
+
 	uap->port.uartclk = clk_get_rate(uap->clk);
 
 	if (options)
@@ -1774,6 +1894,7 @@ static int pl011_probe(struct amba_device *dev, const struct amba_id *id)
 	uap->lcrh_rx = vendor->lcrh_rx;
 	uap->lcrh_tx = vendor->lcrh_tx;
 	uap->fifosize = vendor->fifosize;
+	uap->interrupt_may_hang = vendor->interrupt_may_hang;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 5479fdc849e9..514ed45c462e 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -201,6 +201,9 @@ struct amba_pl011_data {
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
+        void (*init) (void);
+	void (*exit) (void);
+	void (*reset) (void);
 };
 #endif
 
-- 
cgit v1.2.3


From be98ca652faa6468916a9b7608befff215a8ca70 Mon Sep 17 00:00:00 2001
From: Manoj Iyer <manoj.iyer@canonical.com>
Date: Thu, 26 May 2011 11:19:05 -0500
Subject: mmc: Add PCI fixup quirks for Ricoh 1180:e823 reader

Signed-off-by: Manoj Iyer <manoj.iyer@canonical.com>
Cc: <stable@kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/pci/quirks.c    | 2 ++
 include/linux/pci_ids.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e8a140669f90..02145e9697a9 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2761,6 +2761,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
 
 #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a311008af5e1..f8910e155566 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1537,6 +1537,7 @@
 #define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822	0x0822
+#define PCI_DEVICE_ID_RICOH_R5CE823	0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832	0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843	0x0843
 
-- 
cgit v1.2.3


From 155d109b5f52ffd749219b27702462dcd9cf4f8d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Mon, 20 Jun 2011 13:23:14 +0200
Subject: block: add REQ_SECURE to REQ_COMMON_MASK

Add REQ_SECURE flag to REQ_COMMON_MASK so that
init_request_from_bio() can pass it to @req->cmd_flags.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@kernel.org # 2.6.36 and newer
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 2a7cea53ca0d..6395692b2e7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -167,7 +167,7 @@ enum rq_flag_bits {
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \
-	 REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
+	 REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 #define REQ_RAHEAD		(1 << __REQ_RAHEAD)
-- 
cgit v1.2.3


From 19345cb299e8234006c5125151ab723e851a1d24 Mon Sep 17 00:00:00 2001
From: Benny Halevy <benny@tonian.com>
Date: Sun, 19 Jun 2011 18:33:46 -0400
Subject: NFSv4.1: file layout must consider pg_bsize for coalescing

Otherwise we end up overflowing the rpc buffer size on the receive end.

Signed-off-by: Benny Halevy <benny@tonian.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4filelayout.c  | 6 ++++--
 fs/nfs/pagelist.c        | 3 ++-
 include/linux/nfs_page.h | 3 +++
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5d6f369b15d0..0bafcc91c27f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -30,6 +30,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 
 #include "internal.h"
 #include "nfs4filelayout.h"
@@ -666,8 +667,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req))
-		return 0;
+	if (!pnfs_generic_pg_test(pgio, prev, req) ||
+	    !nfs_generic_pg_test(pgio, prev, req))
+		return false;
 
 	if (!pgio->pg_lseg)
 		return 1;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961aff22..009855716286 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +218,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
 
 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 }
+EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
 /**
  * nfs_pageio_init - initialise a page io descriptor
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80ae92f..25311b3bedf8 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -92,6 +92,9 @@ extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+				struct nfs_page *prev,
+				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
-- 
cgit v1.2.3


From f76b168b6f117a49d36307053e1acbe30580ea5b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 18 Jun 2011 20:22:23 +0200
Subject: PM: Rename dev_pm_info.in_suspend to is_prepared

This patch (as1473) renames the "in_suspend" field in struct
dev_pm_info to "is_prepared", in preparation for an upcoming change.
The new name is more descriptive of what the field really means.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@kernel.org
---
 drivers/base/power/main.c | 14 +++++++++-----
 drivers/usb/core/driver.c |  6 +++---
 include/linux/device.h    |  4 ++--
 include/linux/pm.h        |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index aa6320207745..bf5a59ac1957 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -57,7 +57,7 @@ static int async_error;
  */
 void device_pm_init(struct device *dev)
 {
-	dev->power.in_suspend = false;
+	dev->power.is_prepared = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
@@ -91,7 +91,7 @@ void device_pm_add(struct device *dev)
 	pr_debug("PM: Adding info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	mutex_lock(&dpm_list_mtx);
-	if (dev->parent && dev->parent->power.in_suspend)
+	if (dev->parent && dev->parent->power.is_prepared)
 		dev_warn(dev, "parent %s should not be sleeping\n",
 			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
@@ -511,7 +511,11 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	dpm_wait(dev->parent, async);
 	device_lock(dev);
 
-	dev->power.in_suspend = false;
+	/*
+	 * This is a fib.  But we'll allow new children to be added below
+	 * a resumed device, even if the device hasn't been completed yet.
+	 */
+	dev->power.is_prepared = false;
 
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "power domain ");
@@ -670,7 +674,7 @@ void dpm_complete(pm_message_t state)
 		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
-		dev->power.in_suspend = false;
+		dev->power.is_prepared = false;
 		list_move(&dev->power.entry, &list);
 		mutex_unlock(&dpm_list_mtx);
 
@@ -1042,7 +1046,7 @@ int dpm_prepare(pm_message_t state)
 			put_device(dev);
 			break;
 		}
-		dev->power.in_suspend = true;
+		dev->power.is_prepared = true;
 		if (!list_empty(&dev->power.entry))
 			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index e35a17687c05..aa3cc465a601 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -375,7 +375,7 @@ static int usb_unbind_interface(struct device *dev)
 		 * Just re-enable it without affecting the endpoint toggles.
 		 */
 		usb_enable_interface(udev, intf, false);
-	} else if (!error && !intf->dev.power.in_suspend) {
+	} else if (!error && !intf->dev.power.is_prepared) {
 		r = usb_set_interface(udev, intf->altsetting[0].
 				desc.bInterfaceNumber, 0);
 		if (r < 0)
@@ -960,7 +960,7 @@ void usb_rebind_intf(struct usb_interface *intf)
 	}
 
 	/* Try to rebind the interface */
-	if (!intf->dev.power.in_suspend) {
+	if (!intf->dev.power.is_prepared) {
 		intf->needs_binding = 0;
 		rc = device_attach(&intf->dev);
 		if (rc < 0)
@@ -1107,7 +1107,7 @@ static int usb_resume_interface(struct usb_device *udev,
 	if (intf->condition == USB_INTERFACE_UNBOUND) {
 
 		/* Carry out a deferred switch to altsetting 0 */
-		if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) {
+		if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) {
 			usb_set_interface(udev, intf->altsetting[0].
 					desc.bInterfaceNumber, 0);
 			intf->needs_altsetting0 = 0;
diff --git a/include/linux/device.h b/include/linux/device.h
index c66111affca9..553fd37b173b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -654,13 +654,13 @@ static inline int device_is_registered(struct device *dev)
 
 static inline void device_enable_async_suspend(struct device *dev)
 {
-	if (!dev->power.in_suspend)
+	if (!dev->power.is_prepared)
 		dev->power.async_suspend = true;
 }
 
 static inline void device_disable_async_suspend(struct device *dev)
 {
-	if (!dev->power.in_suspend)
+	if (!dev->power.is_prepared)
 		dev->power.async_suspend = false;
 }
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3160648ccdda..cc536bd80984 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -425,7 +425,7 @@ struct dev_pm_info {
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		async_suspend:1;
-	unsigned int		in_suspend:1;	/* Owned by the PM core */
+	bool			is_prepared:1;	/* Owned by the PM core */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
-- 
cgit v1.2.3


From 6d0e0e84f66d32c33511984dd3badd32364b863c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 18 Jun 2011 22:42:09 +0200
Subject: PM: Fix async resume following suspend failure

The PM core doesn't handle suspend failures correctly when it comes to
asynchronously suspended devices.  These devices are moved onto the
dpm_suspended_list as soon as the corresponding async thread is
started up, and they remain on the list even if they fail to suspend
or the sleep transition is cancelled before they get suspended.  As a
result, when the PM core unwinds the transition, it tries to resume
the devices even though they were never suspended.

This patch (as1474) fixes the problem by adding a new "is_suspended"
flag to dev_pm_info.  Devices are resumed only if the flag is set.

[rjw:
 * Moved the dev->power.is_suspended check into device_resume(),
   because we need to complete dev->power.completion and clear
   dev->power.is_prepared too for devices whose
   dev->power.is_suspended flags are unset.
 * Fixed __device_suspend() to avoid setting dev->power.is_suspended
   if async_error is different from zero.]

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@kernel.org
---
 drivers/base/power/main.c | 14 ++++++++++++--
 include/linux/pm.h        |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index bf5a59ac1957..06f09bf89cb2 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -58,6 +58,7 @@ static int async_error;
 void device_pm_init(struct device *dev)
 {
 	dev->power.is_prepared = false;
+	dev->power.is_suspended = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
@@ -517,6 +518,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	 */
 	dev->power.is_prepared = false;
 
+	if (!dev->power.is_suspended)
+		goto Unlock;
+
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "power domain ");
 		error = pm_op(dev, &dev->pwr_domain->ops, state);
@@ -552,6 +556,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	}
 
  End:
+	dev->power.is_suspended = false;
+
+ Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
@@ -839,11 +846,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	device_lock(dev);
 
 	if (async_error)
-		goto End;
+		goto Unlock;
 
 	if (pm_wakeup_pending()) {
 		async_error = -EBUSY;
-		goto End;
+		goto Unlock;
 	}
 
 	if (dev->pwr_domain) {
@@ -881,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	}
 
  End:
+	dev->power.is_suspended = !error;
+
+ Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index cc536bd80984..411e4f4be52b 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -426,6 +426,7 @@ struct dev_pm_info {
 	unsigned int		can_wakeup:1;
 	unsigned int		async_suspend:1;
 	bool			is_prepared:1;	/* Owned by the PM core */
+	bool			is_suspended:1;	/* Ditto */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
-- 
cgit v1.2.3


From c6830c22603aaecf65405af23f6da2d55892f9cb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 16 Jun 2011 17:28:07 +0900
Subject: Fix node_start/end_pfn() definition for mm/page_cgroup.c

commit 21a3c96 uses node_start/end_pfn(nid) for detection start/end
of nodes. But, it's not defined in linux/mmzone.h but defined in
/arch/???/include/mmzone.h which is included only under
CONFIG_NEED_MULTIPLE_NODES=y.

Then, we see
  mm/page_cgroup.c: In function 'page_cgroup_init':
  mm/page_cgroup.c:308: error: implicit declaration of function 'node_start_pfn'
  mm/page_cgroup.c:309: error: implicit declaration of function 'node_end_pfn'

So, fixiing page_cgroup.c is an idea...

But node_start_pfn()/node_end_pfn() is a very generic macro and
should be implemented in the same manner for all archs.
(m32r has different implementation...)

This patch removes definitions of node_start/end_pfn() in each archs
and defines a unified one in linux/mmzone.h. It's not under
CONFIG_NEED_MULTIPLE_NODES, now.

A result of macro expansion is here (mm/page_cgroup.c)

for !NUMA
 start_pfn = ((&contig_page_data)->node_start_pfn);
  end_pfn = ({ pg_data_t *__pgdat = (&contig_page_data); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;});

for NUMA (x86-64)
  start_pfn = ((node_data[nid])->node_start_pfn);
  end_pfn = ({ pg_data_t *__pgdat = (node_data[nid]); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;});

Changelog:
 - fixed to avoid using "nid" twice in node_end_pfn() macro.

Reported-and-acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Reported-and-tested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mmzone.h   |  1 -
 arch/m32r/include/asm/mmzone.h    |  8 +-------
 arch/parisc/include/asm/mmzone.h  |  7 -------
 arch/powerpc/include/asm/mmzone.h |  7 -------
 arch/sh/include/asm/mmzone.h      |  4 ----
 arch/sparc/include/asm/mmzone.h   |  2 --
 arch/tile/include/asm/mmzone.h    | 11 -----------
 arch/x86/include/asm/mmzone_32.h  | 11 -----------
 arch/x86/include/asm/mmzone_64.h  |  3 ---
 include/linux/mmzone.h            |  7 +++++++
 10 files changed, 8 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/mmzone.h b/arch/alpha/include/asm/mmzone.h
index 8af56ce346ad..445dc42e0334 100644
--- a/arch/alpha/include/asm/mmzone.h
+++ b/arch/alpha/include/asm/mmzone.h
@@ -56,7 +56,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
  * Given a kernel address, find the home node of the underlying memory.
  */
 #define kvaddr_to_nid(kaddr)	pa_to_nid(__pa(kaddr))
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 
 /*
  * Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
diff --git a/arch/m32r/include/asm/mmzone.h b/arch/m32r/include/asm/mmzone.h
index 9f3b5accda88..115ced33febd 100644
--- a/arch/m32r/include/asm/mmzone.h
+++ b/arch/m32r/include/asm/mmzone.h
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
 #define node_localnr(pfn, nid)	((pfn) - NODE_DATA(nid)->node_start_pfn)
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages - 1;	\
-})
 
 #define pmd_page(pmd)		(pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
 /*
@@ -44,7 +38,7 @@ static __inline__ int pfn_to_nid(unsigned long pfn)
 	int node;
 
 	for (node = 0 ; node < MAX_NUMNODES ; node++)
-		if (pfn >= node_start_pfn(node) && pfn <= node_end_pfn(node))
+		if (pfn >= node_start_pfn(node) && pfn < node_end_pfn(node))
 			break;
 
 	return node;
diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h
index 9608d2cf214a..e67eb9c3d1bf 100644
--- a/arch/parisc/include/asm/mmzone.h
+++ b/arch/parisc/include/asm/mmzone.h
@@ -14,13 +14,6 @@ extern struct node_map_data node_data[];
 
 #define NODE_DATA(nid)          (&node_data[nid].pg_data)
 
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
-})
-
 /* We have these possible memory map layouts:
  * Astro: 0-3.75, 67.75-68, 4-64
  * zx1: 0-1, 257-260, 4-256
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index fd3fd58bad84..7b589178be46 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -38,13 +38,6 @@ u64 memory_hotplug_max(void);
 #define memory_hotplug_max() memblock_end_of_DRAM()
 #endif
 
-/*
- * Following are macros that each numa implmentation must define.
- */
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
-
 #else
 #define memory_hotplug_max() memblock_end_of_DRAM()
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 8887baff5eff..15a8496960e6 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -9,10 +9,6 @@
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid)		(node_data[nid])
 
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)	(NODE_DATA(nid)->node_start_pfn + \
-				 NODE_DATA(nid)->node_spanned_pages)
-
 static inline int pfn_to_nid(unsigned long pfn)
 {
 	int nid;
diff --git a/arch/sparc/include/asm/mmzone.h b/arch/sparc/include/asm/mmzone.h
index e8c648741ed4..99d9b9f577bf 100644
--- a/arch/sparc/include/asm/mmzone.h
+++ b/arch/sparc/include/asm/mmzone.h
@@ -8,8 +8,6 @@
 extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)		(node_data[nid])
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)	(NODE_DATA(nid)->node_end_pfn)
 
 extern int numa_cpu_lookup_table[];
 extern cpumask_t numa_cpumask_lookup_table[];
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
index c6344c4f32ac..9d3dbce8f953 100644
--- a/arch/tile/include/asm/mmzone.h
+++ b/arch/tile/include/asm/mmzone.h
@@ -40,17 +40,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 	return highbits_to_node[__pfn_to_highbits(pfn)];
 }
 
-/*
- * Following are macros that each numa implmentation must define.
- */
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
-})
-
 #define kern_addr_valid(kaddr)	virt_addr_valid((void *)kaddr)
 
 static inline int pfn_valid(int pfn)
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 5e83a416eca8..224e8c5eb307 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -48,17 +48,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
-/*
- * Following are macros that each numa implmentation must define.
- */
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
-})
-
 static inline int pfn_valid(int pfn)
 {
 	int nid = pfn_to_nid(pfn);
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index b3f88d7867c7..129d9aa3ceb3 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -13,8 +13,5 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)		(node_data[nid])
 
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn +	\
-				 NODE_DATA(nid)->node_spanned_pages)
 #endif
 #endif /* _ASM_X86_MMZONE_64_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c928dac6cad0..9f7c3ebcbbad 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -647,6 +647,13 @@ typedef struct pglist_data {
 #endif
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
+#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+
+#define node_end_pfn(nid) ({\
+	pg_data_t *__pgdat = NODE_DATA(nid);\
+	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
+})
+
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
-- 
cgit v1.2.3


From 4d258b25d947521c8b913154db61ec55198243f8 Mon Sep 17 00:00:00 2001
From: Vitaliy Ivanov <vitalivanov@gmail.com>
Date: Mon, 27 Jun 2011 19:07:08 +0300
Subject: Fix some kernel-doc warnings

Fix 'make htmldocs' warnings:

  Warning(/include/linux/hrtimer.h:153): No description found for parameter 'clockid'
  Warning(/include/linux/device.h:604): Excess struct/union/enum/typedef member 'of_match' description in 'device'
  Warning(/include/net/sock.h:349): Excess struct/union/enum/typedef member 'sk_rmem_alloc' description in 'sock'

Signed-off-by: Vitaliy Ivanov <vitalivanov@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h  | 1 -
 include/linux/hrtimer.h | 1 +
 include/net/sock.h      | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 553fd37b173b..e4f62d8896b7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -530,7 +530,6 @@ struct device_dma_parameters {
  * @dma_mem:	Internal for coherent mem override.
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
- * @of_match:	Matching of_device_id from driver.
  * @devt:	For creating the sysfs "dev".
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 51932e5acf7c..fd0dc30c9f15 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -135,6 +135,7 @@ struct hrtimer_sleeper {
  * @cpu_base:		per cpu clock base
  * @index:		clock type index for per_cpu support when moving a
  *			timer to a base on another cpu.
+ * @clockid:		clock id for per_cpu support
  * @active:		red black tree root node for the active timers
  * @resolution:		the resolution of the clock, in nanoseconds
  * @get_time:		function to retrieve the current time of the clock
diff --git a/include/net/sock.h b/include/net/sock.h
index f2046e404a61..c0b938cb4b1a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -178,7 +178,6 @@ struct sock_common {
   *	@sk_dst_cache: destination cache
   *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
-  *	@sk_rmem_alloc: receive queue bytes committed
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
   *	@sk_write_queue: Packet sending queue
-- 
cgit v1.2.3


From 072441e21ddcd1140606b7d4ef6eab579a86b0b3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 27 Jun 2011 16:18:02 -0700
Subject: mm: move shmem prototypes to shmem_fs.h

Before adding any more global entry points into shmem.c, gather such
prototypes into shmem_fs.h.  Remove mm's own declarations from swap.h,
but for now leave the ones in mm.h: because shmem_file_setup() and
shmem_zero_setup() are called from various places, and we should not
force other subsystems to update immediately.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h | 17 +++++++++++++++++
 include/linux/swap.h     | 10 ----------
 mm/memcontrol.c          |  1 +
 mm/swapfile.c            |  2 +-
 4 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 2b7fec840517..cae65dc42bcc 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -5,6 +5,13 @@
 #include <linux/mempolicy.h>
 #include <linux/percpu_counter.h>
 
+struct page;
+struct file;
+struct inode;
+struct super_block;
+struct user_struct;
+struct vm_area_struct;
+
 /* inode in-kernel data */
 
 #define SHMEM_NR_DIRECT 16
@@ -45,7 +52,17 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+/*
+ * Functions in mm/shmem.c called directly from elsewhere:
+ */
 extern int init_tmpfs(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
+extern struct file *shmem_file_setup(const char *name,
+					loff_t size, unsigned long flags);
+extern int shmem_zero_setup(struct vm_area_struct *);
+extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_unuse(swp_entry_t entry, struct page *page);
+extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
+					struct page **pagep, swp_entry_t *ent);
 
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index e70564647039..a273468f8285 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -300,16 +300,6 @@ static inline void scan_unevictable_unregister_node(struct node *node)
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
-#ifdef CONFIG_MMU
-/* linux/mm/shmem.c */
-extern int shmem_unuse(swp_entry_t entry, struct page *page);
-#endif /* CONFIG_MMU */
-
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
-					struct page **pagep, swp_entry_t *ent);
-#endif
-
 #ifdef CONFIG_SWAP
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf7d027a8844..ddffc74cdebe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,6 +35,7 @@
 #include <linux/limits.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d537d29e9b7b..ff8dc1a18cb4 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -14,7 +14,7 @@
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
-#include <linux/shm.h>
+#include <linux/shmem_fs.h>
 #include <linux/blkdev.h>
 #include <linux/random.h>
 #include <linux/writeback.h>
-- 
cgit v1.2.3


From 94c1e62df4494b79782cb9c7279f827212d1de70 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 27 Jun 2011 16:18:03 -0700
Subject: tmpfs: take control of its truncate_range

2.6.35's new truncate convention gave tmpfs the opportunity to control
its file truncation, no longer enforced from outside by vmtruncate().
We shall want to build upon that, to handle pagecache and swap together.

Slightly redefine the ->truncate_range interface: let it now be called
between the unmap_mapping_range()s, with the filesystem responsible for
doing the truncate_inode_pages_range() from it - just as the filesystem
is nowadays responsible for doing that from its ->setattr.

Let's rename shmem_notify_change() to shmem_setattr().  Instead of
calling the generic truncate_setsize(), bring that code in so we can
call shmem_truncate_range() - which will later be updated to perform its
own variant of truncate_inode_pages_range().

Remove the punch_hole unmap_mapping_range() from shmem_truncate_range():
now that the COW's unmap_mapping_range() comes after ->truncate_range,
there is no need to call it a third time.

Export shmem_truncate_range() and add it to the list in shmem_fs.h, so
that i915_gem_object_truncate() can call it explicitly in future; get
this patch in first, then update drm/i915 once this is available (until
then, i915 will just be doing the truncate_inode_pages() twice).

Though introduced five years ago, no other filesystem is implementing
->truncate_range, and its only other user is madvise(,,MADV_REMOVE): we
expect to convert it to fallocate(,FALLOC_FL_PUNCH_HOLE,,) shortly,
whereupon ->truncate_range can be removed from inode_operations -
shmem_truncate_range() will help i915 across that transition too.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |  1 +
 mm/shmem.c               | 51 +++++++++++++++++++++++++++---------------------
 mm/truncate.c            |  4 ++--
 3 files changed, 32 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index cae65dc42bcc..22a20af4d785 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -61,6 +61,7 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
 					struct page **pagep, swp_entry_t *ent);
diff --git a/mm/shmem.c b/mm/shmem.c
index d221a1cfd7b1..f1714758ea96 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -539,7 +539,7 @@ static void shmem_free_pages(struct list_head *next)
 	} while (next);
 }
 
-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned long idx;
@@ -562,6 +562,8 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 	spinlock_t *punch_lock;
 	unsigned long upper_limit;
 
+	truncate_inode_pages_range(inode->i_mapping, start, end);
+
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (idx >= info->next_index)
@@ -738,16 +740,8 @@ done2:
 		 * lowered next_index.  Also, though shmem_getpage checks
 		 * i_size before adding to cache, no recheck after: so fix the
 		 * narrow window there too.
-		 *
-		 * Recalling truncate_inode_pages_range and unmap_mapping_range
-		 * every time for punch_hole (which never got a chance to clear
-		 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
-		 * yet hardly ever necessary: try to optimize them out later.
 		 */
 		truncate_inode_pages_range(inode->i_mapping, start, end);
-		if (punch_hole)
-			unmap_mapping_range(inode->i_mapping, start,
-							end - start, 1);
 	}
 
 	spin_lock(&info->lock);
@@ -766,22 +760,23 @@ done2:
 		shmem_free_pages(pages_to_free.next);
 	}
 }
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
-	loff_t newsize = attr->ia_size;
 	int error;
 
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
 
-	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)
-					&& newsize != inode->i_size) {
+	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+		loff_t oldsize = inode->i_size;
+		loff_t newsize = attr->ia_size;
 		struct page *page = NULL;
 
-		if (newsize < inode->i_size) {
+		if (newsize < oldsize) {
 			/*
 			 * If truncating down to a partial page, then
 			 * if that page is already allocated, hold it
@@ -810,12 +805,19 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 				spin_unlock(&info->lock);
 			}
 		}
-
-		/* XXX(truncate): truncate_setsize should be called last */
-		truncate_setsize(inode, newsize);
+		if (newsize != oldsize) {
+			i_size_write(inode, newsize);
+			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+		}
+		if (newsize < oldsize) {
+			loff_t holebegin = round_up(newsize, PAGE_SIZE);
+			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+			shmem_truncate_range(inode, newsize, (loff_t)-1);
+			/* unmap again to remove racily COWed private pages */
+			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
+		}
 		if (page)
 			page_cache_release(page);
-		shmem_truncate_range(inode, newsize, (loff_t)-1);
 	}
 
 	setattr_copy(inode, attr);
@@ -832,7 +834,6 @@ static void shmem_evict_inode(struct inode *inode)
 	struct shmem_xattr *xattr, *nxattr;
 
 	if (inode->i_mapping->a_ops == &shmem_aops) {
-		truncate_inode_pages(inode->i_mapping, 0);
 		shmem_unacct_size(info->flags, inode->i_size);
 		inode->i_size = 0;
 		shmem_truncate_range(inode, 0, (loff_t)-1);
@@ -2706,7 +2707,7 @@ static const struct file_operations shmem_file_operations = {
 };
 
 static const struct inode_operations shmem_inode_operations = {
-	.setattr	= shmem_notify_change,
+	.setattr	= shmem_setattr,
 	.truncate_range	= shmem_truncate_range,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
@@ -2739,7 +2740,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.removexattr	= shmem_removexattr,
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setattr	= shmem_notify_change,
+	.setattr	= shmem_setattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
@@ -2752,7 +2753,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.removexattr	= shmem_removexattr,
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
-	.setattr	= shmem_notify_change,
+	.setattr	= shmem_setattr,
 	.check_acl	= generic_check_acl,
 #endif
 };
@@ -2908,6 +2909,12 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
+void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+{
+	truncate_inode_pages_range(inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(shmem_truncate_range);
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /**
  * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
diff --git a/mm/truncate.c b/mm/truncate.c
index 5b4c3a4847e9..29a9b8a5a31a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -619,9 +619,9 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 	mutex_lock(&inode->i_mutex);
 	down_write(&inode->i_alloc_sem);
 	unmap_mapping_range(mapping, offset, (end - offset), 1);
-	truncate_inode_pages_range(mapping, offset, end);
-	unmap_mapping_range(mapping, offset, (end - offset), 1);
 	inode->i_op->truncate_range(inode, offset, end);
+	/* unmap again to remove racily COWed private pages */
+	unmap_mapping_range(mapping, offset, (end - offset), 1);
 	up_write(&inode->i_alloc_sem);
 	mutex_unlock(&inode->i_mutex);
 
-- 
cgit v1.2.3


From d9d90e5eb70e09903dadff42099b6c948f814050 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Mon, 27 Jun 2011 16:18:04 -0700
Subject: tmpfs: add shmem_read_mapping_page_gfp

Although it is used (by i915) on nothing but tmpfs, read_cache_page_gfp()
is unsuited to tmpfs, because it inserts a page into pagecache before
calling the filesystem's ->readpage: tmpfs may have pages in swapcache
which only it knows how to locate and switch to filecache.

At present tmpfs provides a ->readpage method, and copes with this by
copying pages; but soon we can simplify it by removing its ->readpage.
Provide shmem_read_mapping_page_gfp() now, ready for that transition,

Export shmem_read_mapping_page_gfp() and add it to list in shmem_fs.h,
with shmem_read_mapping_page() inline for the common mapping_gfp case.

(shmem_read_mapping_page_gfp or shmem_read_cache_page_gfp? Generally the
read_mapping_page functions use the mapping's ->readpage, and the
read_cache_page functions use the supplied filler, so I think
read_cache_page_gfp was slightly misnamed.)

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h | 17 ++++++++++-------
 mm/shmem.c               | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 22a20af4d785..aa08fa8fd79b 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -3,15 +3,9 @@
 
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
+#include <linux/pagemap.h>
 #include <linux/percpu_counter.h>
 
-struct page;
-struct file;
-struct inode;
-struct super_block;
-struct user_struct;
-struct vm_area_struct;
-
 /* inode in-kernel data */
 
 #define SHMEM_NR_DIRECT 16
@@ -61,9 +55,18 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
 extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
 					struct page **pagep, swp_entry_t *ent);
 
+static inline struct page *shmem_read_mapping_page(
+				struct address_space *mapping, pgoff_t index)
+{
+	return shmem_read_mapping_page_gfp(mapping, index,
+					mapping_gfp_mask(mapping));
+}
+
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index f1714758ea96..fcedf5464eb7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3035,3 +3035,26 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	vma->vm_flags |= VM_CAN_NONLINEAR;
 	return 0;
 }
+
+/**
+ * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags.
+ * @mapping:	the page's address_space
+ * @index:	the page index
+ * @gfp:	the page allocator flags to use if allocating
+ *
+ * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)",
+ * with any new page allocations done using the specified allocation flags.
+ * But read_cache_page_gfp() uses the ->readpage() method: which does not
+ * suit tmpfs, since it may have pages in swapcache, and needs to find those
+ * for itself; although drivers/gpu/drm i915 and ttm rely upon this support.
+ *
+ * Provide a stub for those callers to start using now, then later
+ * flesh it out to call shmem_getpage() with additional gfp mask, when
+ * shmem_file_splice_read() is added and shmem_readpage() is removed.
+ */
+struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
+					 pgoff_t index, gfp_t gfp)
+{
+	return read_cache_page_gfp(mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
-- 
cgit v1.2.3


From 507c5f1224014f9956e604ee8703b3bbea7da4a4 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Mon, 27 Jun 2011 16:18:07 -0700
Subject: include/linux/compat.h: declare compat_sys_sendmmsg()

This is required for tilegx to be able to use the compat unistd.h header
where compat_sys_sendmmsg() is now mentioned.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index ddcb7db38e67..846bb1792572 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -467,6 +467,8 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				      char __user *optval, unsigned int optlen);
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned flags);
+asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+				    unsigned vlen, unsigned int flags);
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned int flags);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len,
-- 
cgit v1.2.3


From 08142579b6ca35883c1ed066a2681de6f6917062 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 27 Jun 2011 16:18:10 -0700
Subject: mm: fix assertion mapping->nrpages == 0 in end_writeback()

Under heavy memory and filesystem load, users observe the assertion
mapping->nrpages == 0 in end_writeback() trigger.  This can be caused by
page reclaim reclaiming the last page from a mapping in the following
race:

	CPU0				CPU1
  ...
  shrink_page_list()
    __remove_mapping()
      __delete_from_page_cache()
        radix_tree_delete()
					evict_inode()
					  truncate_inode_pages()
					    truncate_inode_pages_range()
					      pagevec_lookup() - finds nothing
					  end_writeback()
					    mapping->nrpages != 0 -> BUG
        page->mapping = NULL
        mapping->nrpages--

Fix the problem by doing a reliable check of mapping->nrpages under
mapping->tree_lock in end_writeback().

Analyzed by Jay <jinshan.xiong@whamcloud.com>, lost in LKML, and dug out
by Miklos Szeredi <mszeredi@suse.de>.

Cc: Jay <jinshan.xiong@whamcloud.com>
Cc: Miklos Szeredi <mszeredi@suse.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c         | 7 +++++++
 include/linux/fs.h | 1 +
 mm/truncate.c      | 5 +++++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 0f7e88a7803f..43566d17d1b8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash);
 void end_writeback(struct inode *inode)
 {
 	might_sleep();
+	/*
+	 * We have to cycle tree_lock here because reclaim can be still in the
+	 * process of removing the last page (in __delete_from_page_cache())
+	 * and we must not free mapping under it.
+	 */
+	spin_lock_irq(&inode->i_data.tree_lock);
 	BUG_ON(inode->i_data.nrpages);
+	spin_unlock_irq(&inode->i_data.tree_lock);
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e73e2e9ae33..b5b979247863 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -639,6 +639,7 @@ struct address_space {
 	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
+	/* Protected by tree_lock together with the radix tree */
 	unsigned long		nrpages;	/* number of total pages */
 	pgoff_t			writeback_index;/* writeback starts here */
 	const struct address_space_operations *a_ops;	/* methods */
diff --git a/mm/truncate.c b/mm/truncate.c
index 29a9b8a5a31a..e13f22efaad7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
  * @lstart: offset from which to truncate
  *
  * Called under (and serialised by) inode->i_mutex.
+ *
+ * Note: When this function returns, there can be a page in the process of
+ * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
+ * mapping->nrpages can be non-zero when this function returns even after
+ * truncation of the whole mapping.
  */
 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 {
-- 
cgit v1.2.3


From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 24 Jun 2011 15:25:00 -0700
Subject: ipv6: Don't put artificial limit on routing table size.

IPV6, unlike IPV4, doesn't have a routing cache.

Routing table entries, as well as clones made in response
to route lookup requests, all live in the same table.  And
all of these things are together collected in the destination
cache table for ipv6.

This means that routing table entries count against the garbage
collection limits, even though such entries cannot ever be reclaimed
and are added explicitly by the administrator (rather than being
created in response to lookups).

Therefore it makes no sense to count ipv6 routing table entries
against the GC limits.

Add a DST_NOCOUNT destination cache entry flag, and skip the counting
if it is set.  Use this flag bit in ipv6 when adding routing table
entries.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h |  1 +
 net/core/dst.c    |  6 ++++--
 net/ipv6/route.c  | 13 +++++++------
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 7d15d238b6ec..e12ddfb9eb16 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -77,6 +77,7 @@ struct dst_entry {
 #define DST_NOPOLICY		0x0004
 #define DST_NOHASH		0x0008
 #define DST_NOCACHE		0x0010
+#define DST_NOCOUNT		0x0020
 	union {
 		struct dst_entry	*next;
 		struct rtable __rcu	*rt_next;
diff --git a/net/core/dst.c b/net/core/dst.c
index 9ccca038444f..6135f3671692 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst->lastuse = jiffies;
 	dst->flags = flags;
 	dst->next = NULL;
-	dst_entries_add(ops, 1);
+	if (!(flags & DST_NOCOUNT))
+		dst_entries_add(ops, 1);
 	return dst;
 }
 EXPORT_SYMBOL(dst_alloc);
@@ -243,7 +244,8 @@ again:
 		neigh_release(neigh);
 	}
 
-	dst_entries_add(dst->ops, -1);
+	if (!(dst->flags & DST_NOCOUNT))
+		dst_entries_add(dst->ops, -1);
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c2af4da074b0..0ef1f086feb8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
-					     struct net_device *dev)
+					     struct net_device *dev,
+					     int flags)
 {
-	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
+	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
 
 	memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
 
@@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(idev == NULL))
 		return NULL;
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
 	if (unlikely(rt == NULL)) {
 		in6_dev_put(idev);
 		goto out;
@@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
 
 	if (rt == NULL) {
 		err = -ENOMEM;
@@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 {
 	struct net *net = dev_net(ort->rt6i_dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    ort->dst.dev);
+					    ort->dst.dev, 0);
 
 	if (rt) {
 		rt->dst.input = ort->dst.input;
@@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    net->loopback_dev);
+					    net->loopback_dev, 0);
 	struct neighbour *neigh;
 
 	if (rt == NULL) {
-- 
cgit v1.2.3